Line data Source code
1 : //===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // Instrumentation-based profiling data is generated by instrumented
11 : // binaries through library functions in compiler-rt, and read by the clang
12 : // frontend to feed PGO.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #ifndef LLVM_PROFILEDATA_INSTRPROF_H
17 : #define LLVM_PROFILEDATA_INSTRPROF_H
18 :
19 : #include "llvm/ADT/ArrayRef.h"
20 : #include "llvm/ADT/STLExtras.h"
21 : #include "llvm/ADT/StringRef.h"
22 : #include "llvm/ADT/StringSet.h"
23 : #include "llvm/ADT/Triple.h"
24 : #include "llvm/IR/GlobalValue.h"
25 : #include "llvm/IR/ProfileSummary.h"
26 : #include "llvm/ProfileData/InstrProfData.inc"
27 : #include "llvm/Support/Compiler.h"
28 : #include "llvm/Support/Endian.h"
29 : #include "llvm/Support/Error.h"
30 : #include "llvm/Support/ErrorHandling.h"
31 : #include "llvm/Support/Host.h"
32 : #include "llvm/Support/MD5.h"
33 : #include "llvm/Support/MathExtras.h"
34 : #include "llvm/Support/raw_ostream.h"
35 : #include <algorithm>
36 : #include <cassert>
37 : #include <cstddef>
38 : #include <cstdint>
39 : #include <cstring>
40 : #include <list>
41 : #include <memory>
42 : #include <string>
43 : #include <system_error>
44 : #include <utility>
45 : #include <vector>
46 :
47 : namespace llvm {
48 :
49 : class Function;
50 : class GlobalVariable;
51 : struct InstrProfRecord;
52 : class InstrProfSymtab;
53 : class Instruction;
54 : class MDNode;
55 : class Module;
56 :
57 : enum InstrProfSectKind {
58 : #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
59 : #include "llvm/ProfileData/InstrProfData.inc"
60 : };
61 :
62 : /// Return the name of the profile section corresponding to \p IPSK.
63 : ///
64 : /// The name of the section depends on the object format type \p OF. If
65 : /// \p AddSegmentInfo is true, a segment prefix and additional linker hints may
66 : /// be added to the section name (this is the default).
67 : std::string getInstrProfSectionName(InstrProfSectKind IPSK,
68 : Triple::ObjectFormatType OF,
69 : bool AddSegmentInfo = true);
70 :
71 : /// Return the name profile runtime entry point to do value profiling
72 : /// for a given site.
73 : inline StringRef getInstrProfValueProfFuncName() {
74 : return INSTR_PROF_VALUE_PROF_FUNC_STR;
75 : }
76 :
77 : /// Return the name profile runtime entry point to do value range profiling.
78 : inline StringRef getInstrProfValueRangeProfFuncName() {
79 : return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR;
80 : }
81 :
82 : /// Return the name prefix of variables containing instrumented function names.
83 : inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
84 :
85 : /// Return the name prefix of variables containing per-function control data.
86 : inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
87 :
88 : /// Return the name prefix of profile counter variables.
89 : inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; }
90 :
91 : /// Return the name prefix of value profile variables.
92 : inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; }
93 :
94 : /// Return the name of value profile node array variables:
95 : inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; }
96 :
97 : /// Return the name prefix of the COMDAT group for instrumentation variables
98 : /// associated with a COMDAT function.
99 : inline StringRef getInstrProfComdatPrefix() { return "__profv_"; }
100 :
101 : /// Return the name of the variable holding the strings (possibly compressed)
102 : /// of all function's PGO names.
103 : inline StringRef getInstrProfNamesVarName() {
104 : return "__llvm_prf_nm";
105 : }
106 :
107 : /// Return the name of a covarage mapping variable (internal linkage)
108 : /// for each instrumented source module. Such variables are allocated
109 : /// in the __llvm_covmap section.
110 : inline StringRef getCoverageMappingVarName() {
111 : return "__llvm_coverage_mapping";
112 : }
113 :
114 : /// Return the name of the internal variable recording the array
115 : /// of PGO name vars referenced by the coverage mapping. The owning
116 : /// functions of those names are not emitted by FE (e.g, unused inline
117 : /// functions.)
118 : inline StringRef getCoverageUnusedNamesVarName() {
119 : return "__llvm_coverage_names";
120 : }
121 :
122 : /// Return the name of function that registers all the per-function control
123 : /// data at program startup time by calling __llvm_register_function. This
124 : /// function has internal linkage and is called by __llvm_profile_init
125 : /// runtime method. This function is not generated for these platforms:
126 : /// Darwin, Linux, and FreeBSD.
127 : inline StringRef getInstrProfRegFuncsName() {
128 : return "__llvm_profile_register_functions";
129 : }
130 :
131 : /// Return the name of the runtime interface that registers per-function control
132 : /// data for one instrumented function.
133 : inline StringRef getInstrProfRegFuncName() {
134 : return "__llvm_profile_register_function";
135 : }
136 :
137 : /// Return the name of the runtime interface that registers the PGO name strings.
138 : inline StringRef getInstrProfNamesRegFuncName() {
139 : return "__llvm_profile_register_names_function";
140 : }
141 :
142 : /// Return the name of the runtime initialization method that is generated by
143 : /// the compiler. The function calls __llvm_profile_register_functions and
144 : /// __llvm_profile_override_default_filename functions if needed. This function
145 : /// has internal linkage and invoked at startup time via init_array.
146 : inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
147 :
148 : /// Return the name of the hook variable defined in profile runtime library.
149 : /// A reference to the variable causes the linker to link in the runtime
150 : /// initialization module (which defines the hook variable).
151 : inline StringRef getInstrProfRuntimeHookVarName() {
152 : return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR);
153 : }
154 :
155 : /// Return the name of the compiler generated function that references the
156 : /// runtime hook variable. The function is a weak global.
157 : inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
158 : return "__llvm_profile_runtime_user";
159 : }
160 :
161 : /// Return the marker used to separate PGO names during serialization.
162 : inline StringRef getInstrProfNameSeparator() { return "\01"; }
163 :
164 : /// Return the modified name for function \c F suitable to be
165 : /// used the key for profile lookup. Variable \c InLTO indicates if this
166 : /// is called in LTO optimization passes.
167 : std::string getPGOFuncName(const Function &F, bool InLTO = false,
168 : uint64_t Version = INSTR_PROF_INDEX_VERSION);
169 :
170 : /// Return the modified name for a function suitable to be
171 : /// used the key for profile lookup. The function's original
172 : /// name is \c RawFuncName and has linkage of type \c Linkage.
173 : /// The function is defined in module \c FileName.
174 : std::string getPGOFuncName(StringRef RawFuncName,
175 : GlobalValue::LinkageTypes Linkage,
176 : StringRef FileName,
177 : uint64_t Version = INSTR_PROF_INDEX_VERSION);
178 :
179 : /// Return the name of the global variable used to store a function
180 : /// name in PGO instrumentation. \c FuncName is the name of the function
181 : /// returned by the \c getPGOFuncName call.
182 : std::string getPGOFuncNameVarName(StringRef FuncName,
183 : GlobalValue::LinkageTypes Linkage);
184 :
185 : /// Create and return the global variable for function name used in PGO
186 : /// instrumentation. \c FuncName is the name of the function returned
187 : /// by \c getPGOFuncName call.
188 : GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName);
189 :
190 : /// Create and return the global variable for function name used in PGO
191 : /// instrumentation. /// \c FuncName is the name of the function
192 : /// returned by \c getPGOFuncName call, \c M is the owning module,
193 : /// and \c Linkage is the linkage of the instrumented function.
194 : GlobalVariable *createPGOFuncNameVar(Module &M,
195 : GlobalValue::LinkageTypes Linkage,
196 : StringRef PGOFuncName);
197 :
198 : /// Return the initializer in string of the PGO name var \c NameVar.
199 : StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
200 :
201 : /// Given a PGO function name, remove the filename prefix and return
202 : /// the original (static) function name.
203 : StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
204 : StringRef FileName = "<unknown>");
205 :
206 : /// Given a vector of strings (function PGO names) \c NameStrs, the
207 : /// method generates a combined string \c Result thatis ready to be
208 : /// serialized. The \c Result string is comprised of three fields:
209 : /// The first field is the legnth of the uncompressed strings, and the
210 : /// the second field is the length of the zlib-compressed string.
211 : /// Both fields are encoded in ULEB128. If \c doCompress is false, the
212 : /// third field is the uncompressed strings; otherwise it is the
213 : /// compressed string. When the string compression is off, the
214 : /// second field will have value zero.
215 : Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
216 : bool doCompression, std::string &Result);
217 :
218 : /// Produce \c Result string with the same format described above. The input
219 : /// is vector of PGO function name variables that are referenced.
220 : Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
221 : std::string &Result, bool doCompression = true);
222 :
223 : /// \c NameStrings is a string composed of one of more sub-strings encoded in
224 : /// the format described above. The substrings are separated by 0 or more zero
225 : /// bytes. This method decodes the string and populates the \c Symtab.
226 : Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
227 :
228 : /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being
229 : /// set in IR PGO compilation.
230 : bool isIRPGOFlagSet(const Module *M);
231 :
232 : /// Check if we can safely rename this Comdat function. Instances of the same
233 : /// comdat function may have different control flows thus can not share the
234 : /// same counter variable.
235 : bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false);
236 :
237 : enum InstrProfValueKind : uint32_t {
238 : #define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value,
239 : #include "llvm/ProfileData/InstrProfData.inc"
240 : };
241 :
242 : /// Get the value profile data for value site \p SiteIdx from \p InstrProfR
243 : /// and annotate the instruction \p Inst with the value profile meta data.
244 : /// Annotate up to \p MaxMDCount (default 3) number of records per value site.
245 : void annotateValueSite(Module &M, Instruction &Inst,
246 : const InstrProfRecord &InstrProfR,
247 : InstrProfValueKind ValueKind, uint32_t SiteIndx,
248 : uint32_t MaxMDCount = 3);
249 :
250 : /// Same as the above interface but using an ArrayRef, as well as \p Sum.
251 : void annotateValueSite(Module &M, Instruction &Inst,
252 : ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
253 : InstrProfValueKind ValueKind, uint32_t MaxMDCount);
254 :
255 : /// Extract the value profile data from \p Inst which is annotated with
256 : /// value profile meta data. Return false if there is no value data annotated,
257 : /// otherwise return true.
258 : bool getValueProfDataFromInst(const Instruction &Inst,
259 : InstrProfValueKind ValueKind,
260 : uint32_t MaxNumValueData,
261 : InstrProfValueData ValueData[],
262 : uint32_t &ActualNumValueData, uint64_t &TotalC);
263 :
264 : inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
265 :
266 : /// Return the PGOFuncName meta data associated with a function.
267 : MDNode *getPGOFuncNameMetadata(const Function &F);
268 :
269 : /// Create the PGOFuncName meta data if PGOFuncName is different from
270 : /// function's raw name. This should only apply to internal linkage functions
271 : /// declared by users only.
272 : void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
273 :
274 : /// Check if we can use Comdat for profile variables. This will eliminate
275 : /// the duplicated profile variables for Comdat functions.
276 : bool needsComdatForCounter(const Function &F, const Module &M);
277 :
278 : const std::error_category &instrprof_category();
279 :
280 : enum class instrprof_error {
281 : success = 0,
282 : eof,
283 : unrecognized_format,
284 : bad_magic,
285 : bad_header,
286 : unsupported_version,
287 : unsupported_hash_type,
288 : too_large,
289 : truncated,
290 : malformed,
291 : unknown_function,
292 : hash_mismatch,
293 : count_mismatch,
294 : counter_overflow,
295 : value_site_count_mismatch,
296 : compress_failed,
297 : uncompress_failed,
298 : empty_raw_profile,
299 : zlib_unavailable
300 : };
301 :
302 : inline std::error_code make_error_code(instrprof_error E) {
303 0 : return std::error_code(static_cast<int>(E), instrprof_category());
304 : }
305 :
306 : class InstrProfError : public ErrorInfo<InstrProfError> {
307 : public:
308 504 : InstrProfError(instrprof_error Err) : Err(Err) {
309 : assert(Err != instrprof_error::success && "Not an error");
310 : }
311 :
312 : std::string message() const override;
313 :
314 28 : void log(raw_ostream &OS) const override { OS << message(); }
315 :
316 0 : std::error_code convertToErrorCode() const override {
317 0 : return make_error_code(Err);
318 : }
319 :
320 0 : instrprof_error get() const { return Err; }
321 :
322 : /// Consume an Error and return the raw enum value contained within it. The
323 : /// Error must either be a success value, or contain a single InstrProfError.
324 449 : static instrprof_error take(Error E) {
325 449 : auto Err = instrprof_error::success;
326 898 : handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) {
327 : assert(Err == instrprof_error::success && "Multiple errors encountered");
328 449 : Err = IPE.get();
329 : });
330 449 : return Err;
331 : }
332 :
333 : static char ID;
334 :
335 : private:
336 : instrprof_error Err;
337 : };
338 :
339 : class SoftInstrProfErrors {
340 : /// Count the number of soft instrprof_errors encountered and keep track of
341 : /// the first such error for reporting purposes.
342 :
343 : /// The first soft error encountered.
344 : instrprof_error FirstError = instrprof_error::success;
345 :
346 : /// The number of hash mismatches.
347 : unsigned NumHashMismatches = 0;
348 :
349 : /// The number of count mismatches.
350 : unsigned NumCountMismatches = 0;
351 :
352 : /// The number of counter overflows.
353 : unsigned NumCounterOverflows = 0;
354 :
355 : /// The number of value site count mismatches.
356 : unsigned NumValueSiteCountMismatches = 0;
357 :
358 : public:
359 : SoftInstrProfErrors() = default;
360 :
361 : ~SoftInstrProfErrors() {
362 : assert(FirstError == instrprof_error::success &&
363 : "Unchecked soft error encountered");
364 : }
365 :
366 : /// Track a soft error (\p IE) and increment its associated counter.
367 : void addError(instrprof_error IE);
368 :
369 : /// Get the number of hash mismatches.
370 : unsigned getNumHashMismatches() const { return NumHashMismatches; }
371 :
372 : /// Get the number of count mismatches.
373 : unsigned getNumCountMismatches() const { return NumCountMismatches; }
374 :
375 : /// Get the number of counter overflows.
376 : unsigned getNumCounterOverflows() const { return NumCounterOverflows; }
377 :
378 : /// Get the number of value site count mismatches.
379 : unsigned getNumValueSiteCountMismatches() const {
380 : return NumValueSiteCountMismatches;
381 : }
382 :
383 : /// Return the first encountered error and reset FirstError to a success
384 : /// value.
385 : Error takeError() {
386 : if (FirstError == instrprof_error::success)
387 : return Error::success();
388 : auto E = make_error<InstrProfError>(FirstError);
389 : FirstError = instrprof_error::success;
390 : return E;
391 : }
392 : };
393 :
394 : namespace object {
395 :
396 : class SectionRef;
397 :
398 : } // end namespace object
399 :
400 : namespace IndexedInstrProf {
401 :
402 : uint64_t ComputeHash(StringRef K);
403 :
404 : } // end namespace IndexedInstrProf
405 :
406 : /// A symbol table used for function PGO name look-up with keys
407 : /// (such as pointers, md5hash values) to the function. A function's
408 : /// PGO name or name's md5hash are used in retrieving the profile
409 : /// data of the function. See \c getPGOFuncName() method for details
410 : /// on how PGO name is formed.
411 : class InstrProfSymtab {
412 : public:
413 : using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
414 :
415 : private:
416 : StringRef Data;
417 : uint64_t Address = 0;
418 : // Unique name strings.
419 : StringSet<> NameTab;
420 : // A map from MD5 keys to function name strings.
421 : std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
422 : // A map from MD5 keys to function define. We only populate this map
423 : // when build the Symtab from a Module.
424 : std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
425 : // A map from function runtime address to function name MD5 hash.
426 : // This map is only populated and used by raw instr profile reader.
427 : AddrHashMap AddrToMD5Map;
428 : bool Sorted = false;
429 :
430 : static StringRef getExternalSymbol() {
431 : return "** External Symbol **";
432 : }
433 :
434 : // If the symtab is created by a series of calls to \c addFuncName, \c
435 : // finalizeSymtab needs to be called before looking up function names.
436 : // This is required because the underlying map is a vector (for space
437 : // efficiency) which needs to be sorted.
438 : inline void finalizeSymtab();
439 :
440 : public:
441 2008 : InstrProfSymtab() = default;
442 :
443 : /// Create InstrProfSymtab from an object file section which
444 : /// contains function PGO names. When section may contain raw
445 : /// string data or string data in compressed form. This method
446 : /// only initialize the symtab with reference to the data and
447 : /// the section base address. The decompression will be delayed
448 : /// until before it is used. See also \c create(StringRef) method.
449 : Error create(object::SectionRef &Section);
450 :
451 : /// This interface is used by reader of CoverageMapping test
452 : /// format.
453 : inline Error create(StringRef D, uint64_t BaseAddr);
454 :
455 : /// \c NameStrings is a string composed of one of more sub-strings
456 : /// encoded in the format described in \c collectPGOFuncNameStrings.
457 : /// This method is a wrapper to \c readPGOFuncNameStrings method.
458 : inline Error create(StringRef NameStrings);
459 :
460 : /// A wrapper interface to populate the PGO symtab with functions
461 : /// decls from module \c M. This interface is used by transformation
462 : /// passes such as indirect function call promotion. Variable \c InLTO
463 : /// indicates if this is called from LTO optimization passes.
464 : Error create(Module &M, bool InLTO = false);
465 :
466 : /// Create InstrProfSymtab from a set of names iteratable from
467 : /// \p IterRange. This interface is used by IndexedProfReader.
468 : template <typename NameIterRange> Error create(const NameIterRange &IterRange);
469 :
470 : /// Update the symtab by adding \p FuncName to the table. This interface
471 : /// is used by the raw and text profile readers.
472 2486 : Error addFuncName(StringRef FuncName) {
473 2486 : if (FuncName.empty())
474 : return make_error<InstrProfError>(instrprof_error::malformed);
475 : auto Ins = NameTab.insert(FuncName);
476 2483 : if (Ins.second) {
477 2377 : MD5NameMap.push_back(std::make_pair(
478 : IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey()));
479 2377 : Sorted = false;
480 : }
481 : return Error::success();
482 : }
483 :
484 : /// Map a function address to its name's MD5 hash. This interface
485 : /// is only used by the raw profiler reader.
486 : void mapAddress(uint64_t Addr, uint64_t MD5Val) {
487 38 : AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
488 : }
489 :
490 : /// Return a function's hash, or 0, if the function isn't in this SymTab.
491 : uint64_t getFunctionHashFromAddress(uint64_t Address);
492 :
493 : /// Return function's PGO name from the function name's symbol
494 : /// address in the object file. If an error occurs, return
495 : /// an empty string.
496 : StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
497 :
498 : /// Return function's PGO name from the name's md5 hash value.
499 : /// If not found, return an empty string.
500 : inline StringRef getFuncName(uint64_t FuncMD5Hash);
501 :
502 : /// Just like getFuncName, except that it will return a non-empty StringRef
503 : /// if the function is external to this symbol table. All such cases
504 : /// will be represented using the same StringRef value.
505 : inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash);
506 :
507 : /// True if Symbol is the value used to represent external symbols.
508 : static bool isExternalSymbol(const StringRef &Symbol) {
509 : return Symbol == InstrProfSymtab::getExternalSymbol();
510 : }
511 :
512 : /// Return function from the name's md5 hash. Return nullptr if not found.
513 : inline Function *getFunction(uint64_t FuncMD5Hash);
514 :
515 : /// Return the function's original assembly name by stripping off
516 : /// the prefix attached (to symbols with priviate linkage). For
517 : /// global functions, it returns the same string as getFuncName.
518 : inline StringRef getOrigFuncName(uint64_t FuncMD5Hash);
519 :
520 : /// Return the name section data.
521 0 : inline StringRef getNameData() const { return Data; }
522 : };
523 :
524 0 : Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
525 98 : Data = D;
526 98 : Address = BaseAddr;
527 0 : return Error::success();
528 : }
529 :
530 : Error InstrProfSymtab::create(StringRef NameStrings) {
531 97 : return readPGOFuncNameStrings(NameStrings, *this);
532 : }
533 :
534 : template <typename NameIterRange>
535 6 : Error InstrProfSymtab::create(const NameIterRange &IterRange) {
536 27 : for (auto Name : IterRange)
537 42 : if (Error E = addFuncName(Name))
538 : return E;
539 :
540 6 : finalizeSymtab();
541 : return Error::success();
542 : }
543 :
544 1562 : void InstrProfSymtab::finalizeSymtab() {
545 1562 : if (Sorted)
546 : return;
547 : llvm::sort(MD5NameMap, less_first());
548 : llvm::sort(MD5FuncMap, less_first());
549 : llvm::sort(AddrToMD5Map, less_first());
550 988 : AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
551 988 : AddrToMD5Map.end());
552 988 : Sorted = true;
553 : }
554 :
555 : StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) {
556 12 : StringRef ret = getFuncName(FuncMD5Hash);
557 12 : if (ret.empty())
558 : return InstrProfSymtab::getExternalSymbol();
559 9 : return ret;
560 : }
561 :
562 599 : StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
563 599 : finalizeSymtab();
564 : auto Result =
565 : std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash,
566 : [](const std::pair<uint64_t, std::string> &LHS,
567 599 : uint64_t RHS) { return LHS.first < RHS; });
568 599 : if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash)
569 592 : return Result->second;
570 7 : return StringRef();
571 : }
572 :
573 55 : Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
574 55 : finalizeSymtab();
575 : auto Result =
576 : std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash,
577 : [](const std::pair<uint64_t, Function*> &LHS,
578 140 : uint64_t RHS) { return LHS.first < RHS; });
579 55 : if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash)
580 52 : return Result->second;
581 : return nullptr;
582 : }
583 :
584 : // See also getPGOFuncName implementation. These two need to be
585 : // matched.
586 24 : StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
587 24 : StringRef PGOName = getFuncName(FuncMD5Hash);
588 24 : size_t S = PGOName.find_first_of(':');
589 24 : if (S == StringRef::npos)
590 12 : return PGOName;
591 12 : return PGOName.drop_front(S + 1);
592 : }
593 :
594 300 : struct InstrProfValueSiteRecord {
595 : /// Value profiling data pairs at a given value site.
596 : std::list<InstrProfValueData> ValueData;
597 :
598 : InstrProfValueSiteRecord() { ValueData.clear(); }
599 : template <class InputIterator>
600 : InstrProfValueSiteRecord(InputIterator F, InputIterator L)
601 143 : : ValueData(F, L) {}
602 :
603 : /// Sort ValueData ascending by Value
604 : void sortByTargetValues() {
605 16 : ValueData.sort(
606 : [](const InstrProfValueData &left, const InstrProfValueData &right) {
607 4108 : return left.Value < right.Value;
608 : });
609 : }
610 : /// Sort ValueData Descending by Count
611 : inline void sortByCount();
612 :
613 : /// Merge data from another InstrProfValueSiteRecord
614 : /// Optionally scale merged counts by \p Weight.
615 : void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
616 : function_ref<void(instrprof_error)> Warn);
617 : /// Scale up value profile data counts.
618 : void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
619 : };
620 :
621 : /// Profiling information for a single function.
622 767 : struct InstrProfRecord {
623 : std::vector<uint64_t> Counts;
624 :
625 : InstrProfRecord() = default;
626 997 : InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {}
627 184 : InstrProfRecord(InstrProfRecord &&) = default;
628 683 : InstrProfRecord(const InstrProfRecord &RHS)
629 1366 : : Counts(RHS.Counts),
630 : ValueData(RHS.ValueData
631 : ? llvm::make_unique<ValueProfData>(*RHS.ValueData)
632 683 : : nullptr) {}
633 : InstrProfRecord &operator=(InstrProfRecord &&) = default;
634 218 : InstrProfRecord &operator=(const InstrProfRecord &RHS) {
635 218 : Counts = RHS.Counts;
636 218 : if (!RHS.ValueData) {
637 : ValueData = nullptr;
638 216 : return *this;
639 : }
640 2 : if (!ValueData)
641 4 : ValueData = llvm::make_unique<ValueProfData>(*RHS.ValueData);
642 : else
643 : *ValueData = *RHS.ValueData;
644 : return *this;
645 : }
646 :
647 : /// Return the number of value profile kinds with non-zero number
648 : /// of profile sites.
649 : inline uint32_t getNumValueKinds() const;
650 : /// Return the number of instrumented sites for ValueKind.
651 : inline uint32_t getNumValueSites(uint32_t ValueKind) const;
652 :
653 : /// Return the total number of ValueData for ValueKind.
654 : inline uint32_t getNumValueData(uint32_t ValueKind) const;
655 :
656 : /// Return the number of value data collected for ValueKind at profiling
657 : /// site: Site.
658 : inline uint32_t getNumValueDataForSite(uint32_t ValueKind,
659 : uint32_t Site) const;
660 :
661 : /// Return the array of profiled values at \p Site. If \p TotalC
662 : /// is not null, the total count of all target values at this site
663 : /// will be stored in \c *TotalC.
664 : inline std::unique_ptr<InstrProfValueData[]>
665 : getValueForSite(uint32_t ValueKind, uint32_t Site,
666 : uint64_t *TotalC = nullptr) const;
667 :
668 : /// Get the target value/counts of kind \p ValueKind collected at site
669 : /// \p Site and store the result in array \p Dest. Return the total
670 : /// counts of all target values at this site.
671 : inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind,
672 : uint32_t Site) const;
673 :
674 : /// Reserve space for NumValueSites sites.
675 : inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites);
676 :
677 : /// Add ValueData for ValueKind at value Site.
678 : void addValueData(uint32_t ValueKind, uint32_t Site,
679 : InstrProfValueData *VData, uint32_t N,
680 : InstrProfSymtab *SymTab);
681 :
682 : /// Merge the counts in \p Other into this one.
683 : /// Optionally scale merged counts by \p Weight.
684 : void merge(InstrProfRecord &Other, uint64_t Weight,
685 : function_ref<void(instrprof_error)> Warn);
686 :
687 : /// Scale up profile counts (including value profile data) by
688 : /// \p Weight.
689 : void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
690 :
691 : /// Sort value profile data (per site) by count.
692 583 : void sortValueData() {
693 1749 : for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
694 1239 : for (auto &SR : getValueSitesForKind(Kind))
695 73 : SR.sortByCount();
696 583 : }
697 :
698 : /// Clear value data entries and edge counters.
699 : void Clear() {
700 : Counts.clear();
701 : clearValueData();
702 : }
703 :
704 : /// Clear value data entries
705 : void clearValueData() { ValueData = nullptr; }
706 :
707 : private:
708 22 : struct ValueProfData {
709 : std::vector<InstrProfValueSiteRecord> IndirectCallSites;
710 : std::vector<InstrProfValueSiteRecord> MemOPSizes;
711 : };
712 : std::unique_ptr<ValueProfData> ValueData;
713 :
714 : MutableArrayRef<InstrProfValueSiteRecord>
715 : getValueSitesForKind(uint32_t ValueKind) {
716 : // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever
717 : // implemented in LLVM) to call the const overload of this function, then
718 : // cast away the constness from the result.
719 : auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind(
720 1192 : ValueKind);
721 : return makeMutableArrayRef(
722 1192 : const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size());
723 : }
724 : ArrayRef<InstrProfValueSiteRecord>
725 5632 : getValueSitesForKind(uint32_t ValueKind) const {
726 5632 : if (!ValueData)
727 4905 : return None;
728 727 : switch (ValueKind) {
729 542 : case IPVK_IndirectCallTarget:
730 542 : return ValueData->IndirectCallSites;
731 185 : case IPVK_MemOPSize:
732 185 : return ValueData->MemOPSizes;
733 0 : default:
734 0 : llvm_unreachable("Unknown value kind!");
735 : }
736 : }
737 :
738 : std::vector<InstrProfValueSiteRecord> &
739 267 : getOrCreateValueSitesForKind(uint32_t ValueKind) {
740 267 : if (!ValueData)
741 130 : ValueData = llvm::make_unique<ValueProfData>();
742 267 : switch (ValueKind) {
743 246 : case IPVK_IndirectCallTarget:
744 246 : return ValueData->IndirectCallSites;
745 21 : case IPVK_MemOPSize:
746 21 : return ValueData->MemOPSizes;
747 0 : default:
748 0 : llvm_unreachable("Unknown value kind!");
749 : }
750 : }
751 :
752 : // Map indirect call target name hash to name string.
753 : uint64_t remapValue(uint64_t Value, uint32_t ValueKind,
754 : InstrProfSymtab *SymTab);
755 :
756 : // Merge Value Profile data from Src record to this record for ValueKind.
757 : // Scale merged value counts by \p Weight.
758 : void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src,
759 : uint64_t Weight,
760 : function_ref<void(instrprof_error)> Warn);
761 :
762 : // Scale up value profile data count.
763 : void scaleValueProfData(uint32_t ValueKind, uint64_t Weight,
764 : function_ref<void(instrprof_error)> Warn);
765 : };
766 :
767 1912 : struct NamedInstrProfRecord : InstrProfRecord {
768 : StringRef Name;
769 : uint64_t Hash;
770 :
771 : NamedInstrProfRecord() = default;
772 : NamedInstrProfRecord(StringRef Name, uint64_t Hash,
773 : std::vector<uint64_t> Counts)
774 2376 : : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
775 : };
776 :
777 : uint32_t InstrProfRecord::getNumValueKinds() const {
778 : uint32_t NumValueKinds = 0;
779 1359 : for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
780 906 : NumValueKinds += !(getValueSitesForKind(Kind).empty());
781 : return NumValueKinds;
782 : }
783 :
784 42 : uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const {
785 : uint32_t N = 0;
786 164 : for (auto &SR : getValueSitesForKind(ValueKind))
787 122 : N += SR.ValueData.size();
788 42 : return N;
789 : }
790 :
791 : uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const {
792 2957 : return getValueSitesForKind(ValueKind).size();
793 : }
794 :
795 : uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
796 : uint32_t Site) const {
797 229 : return getValueSitesForKind(ValueKind)[Site].ValueData.size();
798 : }
799 :
800 : std::unique_ptr<InstrProfValueData[]>
801 69 : InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site,
802 : uint64_t *TotalC) const {
803 : uint64_t Dummy;
804 69 : uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC);
805 : uint32_t N = getNumValueDataForSite(ValueKind, Site);
806 69 : if (N == 0) {
807 8 : TotalCount = 0;
808 : return std::unique_ptr<InstrProfValueData[]>(nullptr);
809 : }
810 :
811 61 : auto VD = llvm::make_unique<InstrProfValueData[]>(N);
812 61 : TotalCount = getValueForSite(VD.get(), ValueKind, Site);
813 :
814 : return VD;
815 : }
816 :
817 132 : uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
818 : uint32_t ValueKind,
819 : uint32_t Site) const {
820 : uint32_t I = 0;
821 : uint64_t TotalCount = 0;
822 1529 : for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) {
823 1397 : Dest[I].Value = V.Value;
824 1397 : Dest[I].Count = V.Count;
825 : TotalCount = SaturatingAdd(TotalCount, V.Count);
826 1397 : I++;
827 : }
828 132 : return TotalCount;
829 : }
830 :
831 47 : void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
832 47 : if (!NumValueSites)
833 : return;
834 71 : getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites);
835 : }
836 :
837 : inline support::endianness getHostEndianness() {
838 : return sys::IsLittleEndianHost ? support::little : support::big;
839 : }
840 :
841 : // Include definitions for value profile data
842 : #define INSTR_PROF_VALUE_PROF_DATA
843 : #include "llvm/ProfileData/InstrProfData.inc"
844 :
845 73 : void InstrProfValueSiteRecord::sortByCount() {
846 73 : ValueData.sort(
847 : [](const InstrProfValueData &left, const InstrProfValueData &right) {
848 6752 : return left.Count > right.Count;
849 : });
850 : // Now truncate
851 : size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
852 73 : if (ValueData.size() > max_s)
853 2 : ValueData.resize(max_s);
854 73 : }
855 :
856 : namespace IndexedInstrProf {
857 :
858 : enum class HashT : uint32_t {
859 : MD5,
860 : Last = MD5
861 : };
862 :
863 : inline uint64_t ComputeHash(HashT Type, StringRef K) {
864 790 : switch (Type) {
865 790 : case HashT::MD5:
866 4326 : return MD5Hash(K);
867 : }
868 0 : llvm_unreachable("Unhandled hash type");
869 : }
870 :
871 : const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
872 :
873 : enum ProfVersion {
874 : // Version 1 is the first version. In this version, the value of
875 : // a key/value pair can only include profile data of a single function.
876 : // Due to this restriction, the number of block counters for a given
877 : // function is not recorded but derived from the length of the value.
878 : Version1 = 1,
879 : // The version 2 format supports recording profile data of multiple
880 : // functions which share the same key in one value field. To support this,
881 : // the number block counters is recorded as an uint64_t field right after the
882 : // function structural hash.
883 : Version2 = 2,
884 : // Version 3 supports value profile data. The value profile data is expected
885 : // to follow the block counter profile data.
886 : Version3 = 3,
887 : // In this version, profile summary data \c IndexedInstrProf::Summary is
888 : // stored after the profile header.
889 : Version4 = 4,
890 : // In this version, the frontend PGO stable hash algorithm defaults to V2.
891 : Version5 = 5,
892 : // The current version is 5.
893 : CurrentVersion = INSTR_PROF_INDEX_VERSION
894 : };
895 : const uint64_t Version = ProfVersion::CurrentVersion;
896 :
897 : const HashT HashType = HashT::MD5;
898 :
899 3133 : inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
900 :
901 : // This structure defines the file header of the LLVM profile
902 : // data file in indexed-format.
903 : struct Header {
904 : uint64_t Magic;
905 : uint64_t Version;
906 : uint64_t Unused; // Becomes unused since version 4
907 : uint64_t HashType;
908 : uint64_t HashOffset;
909 : };
910 :
911 : // Profile summary data recorded in the profile data file in indexed
912 : // format. It is introduced in version 4. The summary data follows
913 : // right after the profile file header.
914 : struct Summary {
915 : struct Entry {
916 : uint64_t Cutoff; ///< The required percentile of total execution count.
917 : uint64_t
918 : MinBlockCount; ///< The minimum execution count for this percentile.
919 : uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count.
920 : };
921 : // The field kind enumerator to assigned value mapping should remain
922 : // unchanged when a new kind is added or an old kind gets deleted in
923 : // the future.
924 : enum SummaryFieldKind {
925 : /// The total number of functions instrumented.
926 : TotalNumFunctions = 0,
927 : /// Total number of instrumented blocks/edges.
928 : TotalNumBlocks = 1,
929 : /// The maximal execution count among all functions.
930 : /// This field does not exist for profile data from IR based
931 : /// instrumentation.
932 : MaxFunctionCount = 2,
933 : /// Max block count of the program.
934 : MaxBlockCount = 3,
935 : /// Max internal block count of the program (excluding entry blocks).
936 : MaxInternalBlockCount = 4,
937 : /// The sum of all instrumented block counts.
938 : TotalBlockCount = 5,
939 : NumKinds = TotalBlockCount + 1
940 : };
941 :
942 : // The number of summmary fields following the summary header.
943 : uint64_t NumSummaryFields;
944 : // The number of Cutoff Entries (Summary::Entry) following summary fields.
945 : uint64_t NumCutoffEntries;
946 :
947 : Summary() = delete;
948 601 : Summary(uint32_t Size) { memset(this, 0, Size); }
949 :
950 601 : void operator delete(void *ptr) { ::operator delete(ptr); }
951 :
952 : static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) {
953 601 : return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) +
954 601 : NumSumFields * sizeof(uint64_t);
955 : }
956 :
957 : const uint64_t *getSummaryDataBase() const {
958 : return reinterpret_cast<const uint64_t *>(this + 1);
959 : }
960 :
961 : uint64_t *getSummaryDataBase() {
962 : return reinterpret_cast<uint64_t *>(this + 1);
963 : }
964 :
965 : const Entry *getCutoffEntryBase() const {
966 : return reinterpret_cast<const Entry *>(
967 6038 : &getSummaryDataBase()[NumSummaryFields]);
968 : }
969 :
970 : Entry *getCutoffEntryBase() {
971 : return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]);
972 : }
973 :
974 : uint64_t get(SummaryFieldKind K) const {
975 2238 : return getSummaryDataBase()[K];
976 : }
977 :
978 : void set(SummaryFieldKind K, uint64_t V) {
979 1368 : getSummaryDataBase()[K] = V;
980 : }
981 :
982 6038 : const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; }
983 :
984 : void setEntry(uint32_t I, const ProfileSummaryEntry &E) {
985 3648 : Entry &ER = getCutoffEntryBase()[I];
986 3648 : ER.Cutoff = E.Cutoff;
987 3648 : ER.MinBlockCount = E.MinCount;
988 3648 : ER.NumBlocks = E.NumCounts;
989 : }
990 : };
991 :
992 601 : inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) {
993 601 : return std::unique_ptr<Summary>(new (::operator new(TotalSize))
994 601 : Summary(TotalSize));
995 : }
996 :
997 : } // end namespace IndexedInstrProf
998 :
999 : namespace RawInstrProf {
1000 :
1001 : // Version 1: First version
1002 : // Version 2: Added value profile data section. Per-function control data
1003 : // struct has more fields to describe value profile information.
1004 : // Version 3: Compressed name section support. Function PGO name reference
1005 : // from control data struct is changed from raw pointer to Name's MD5 value.
1006 : // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
1007 : // raw header.
1008 : const uint64_t Version = INSTR_PROF_RAW_VERSION;
1009 :
1010 : template <class IntPtrT> inline uint64_t getMagic();
1011 : template <> inline uint64_t getMagic<uint64_t>() {
1012 : return INSTR_PROF_RAW_MAGIC_64;
1013 : }
1014 :
1015 : template <> inline uint64_t getMagic<uint32_t>() {
1016 : return INSTR_PROF_RAW_MAGIC_32;
1017 : }
1018 :
1019 : // Per-function profile data header/control structure.
1020 : // The definition should match the structure defined in
1021 : // compiler-rt/lib/profile/InstrProfiling.h.
1022 : // It should also match the synthesized type in
1023 : // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
1024 : template <class IntPtrT> struct alignas(8) ProfileData {
1025 : #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
1026 : #include "llvm/ProfileData/InstrProfData.inc"
1027 : };
1028 :
1029 : // File header structure of the LLVM profile data in raw format.
1030 : // The definition should match the header referenced in
1031 : // compiler-rt/lib/profile/InstrProfilingFile.c and
1032 : // InstrProfilingBuffer.c.
1033 : struct Header {
1034 : #define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name;
1035 : #include "llvm/ProfileData/InstrProfData.inc"
1036 : };
1037 :
1038 : } // end namespace RawInstrProf
1039 :
1040 : // Parse MemOP Size range option.
1041 : void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
1042 : int64_t &RangeLast);
1043 :
1044 : } // end namespace llvm
1045 :
1046 : #endif // LLVM_PROFILEDATA_INSTRPROF_H
|