Go to the documentation of this file.
68 for (
int K = 0; K < NItems; K++) {
69 FDOStream.
seek(
P[K].Pos);
70 for (
int I = 0;
I <
P[K].N;
I++)
76 FDOStream.
seek(LastPos);
79 std::string &
Data = SOStream.
str();
80 for (
int K = 0; K < NItems; K++) {
81 for (
int I = 0;
I <
P[K].N;
I++) {
82 uint64_t Bytes = endian::byte_swap<uint64_t, little>(
P[K].
D[
I]);
84 (
const char *)&Bytes,
sizeof(
uint64_t));
118 static std::pair<offset_type, offset_type>
128 for (
const auto &ProfileData : *V) {
139 return std::make_pair(
N,
M);
150 for (
const auto &ProfileData : *V) {
163 std::unique_ptr<ValueProfData> VDataPtr =
164 ValueProfData::serializeFrom(ProfileData.second);
166 VDataPtr->swapBytesFromHost(ValueProfDataEndianness);
167 Out.
write((
const char *)VDataPtr.get(),
S);
186 this->Sparse = Sparse;
200 auto Name =
Other.Name;
201 auto Hash =
Other.Hash;
202 Other.accumulateCounts(FuncLevelOverlap.
Test);
203 if (FunctionData.find(Name) == FunctionData.end()) {
211 auto &ProfileDataMap = FunctionData[Name];
214 std::tie(Where, NewFunc) =
226 Dest.
overlap(
Other, Overlap, FuncLevelOverlap, ValueCutoff);
232 auto &ProfileDataMap = FunctionData[Name];
236 std::tie(Where, NewFunc) =
241 Warn(make_error<InstrProfError>(
E));
248 Dest.
scale(Weight, 1, MapWarn);
251 Dest.
merge(
I, Weight, MapWarn);
259 auto Result = MemProfRecordData.insert({
Id,
Record});
271 auto Result = MemProfFrameData.insert({
Id, Frame});
276 if (!Result.second && Result.first->second != Frame) {
278 "frame to id mapping mismatch"));
290 for (
auto &
I : IPW.FunctionData)
291 for (
auto &Func :
I.getValue())
294 BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size());
295 for (
auto &
I : IPW.BinaryIds)
298 MemProfFrameData.reserve(IPW.MemProfFrameData.size());
299 for (
auto &
I : IPW.MemProfFrameData) {
306 MemProfRecordData.reserve(IPW.MemProfRecordData.size());
307 for (
auto &
I : IPW.MemProfRecordData) {
312 bool InstrProfWriter::shouldEncodeData(
const ProfilingData &
PD) {
315 for (
const auto &Func :
PD) {
325 using namespace IndexedInstrProf;
336 for (
unsigned I = 0;
I < Res.size();
I++)
341 using namespace IndexedInstrProf;
347 InfoObj->SummaryBuilder = &ISB;
349 InfoObj->CSSummaryBuilder = &CSISB;
352 for (
const auto &
I : FunctionData)
353 if (shouldEncodeData(
I.getValue()))
354 Generator.insert(
I.getKey(), &
I.getValue());
361 Header.Version |= VARIANT_MASK_IR_PROF;
363 Header.Version |= VARIANT_MASK_CSIR_PROF;
364 if (
static_cast<bool>(ProfileKind &
366 Header.Version |= VARIANT_MASK_INSTR_ENTRY;
368 Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
370 Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
372 Header.Version |= VARIANT_MASK_MEMPROF;
376 Header.HashOffset = 0;
377 Header.MemProfOffset = 0;
378 Header.BinaryIdOffset = 0;
384 for (
int I = 0;
I <
N - 3;
I++)
410 for (
unsigned I = 0;
I < SummarySize /
sizeof(
uint64_t);
I++)
415 CSSummaryOffset = OS.
tell();
416 CSSummarySize = SummarySize /
sizeof(
uint64_t);
417 for (
unsigned I = 0;
I < CSSummarySize;
I++)
438 MemProfSectionStart = OS.
tell();
445 for (
const auto Id : Schema) {
449 auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
450 RecordWriter->Schema = &Schema;
452 RecordTableGenerator;
453 for (
auto &
I : MemProfRecordData) {
455 RecordTableGenerator.insert(
I.first,
I.second);
459 RecordTableGenerator.
Emit(OS.
OS, *RecordWriter);
463 auto FrameWriter = std::make_unique<memprof::FrameWriterTrait>();
466 for (
auto &
I : MemProfFrameData) {
468 FrameTableGenerator.insert(
I.first,
I.second);
471 uint64_t FrameTableOffset = FrameTableGenerator.
Emit(OS.
OS, *FrameWriter);
474 {MemProfSectionStart, &RecordTableOffset, 1},
475 {MemProfSectionStart +
sizeof(
uint64_t), &FramePayloadOffset, 1},
476 {MemProfSectionStart + 2 *
sizeof(
uint64_t), &FrameTableOffset, 1},
478 OS.
patch(PatchItems, 3);
493 BinaryIds.erase(
std::unique(BinaryIds.begin(), BinaryIds.end()),
496 for (
auto BI : BinaryIds) {
498 BinaryIdsSectionSize +=
sizeof(
uint64_t);
503 OS.
write(BinaryIdsSectionSize);
505 for (
auto BI : BinaryIds) {
510 for (
unsigned K = 0; K < BILen; K++)
514 for (
unsigned K = 0; K < PaddingSize; K++)
519 std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
523 std::unique_ptr<ProfileSummary> PS = ISB.getSummary();
525 InfoObj->SummaryBuilder =
nullptr;
528 std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary =
nullptr;
531 std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
534 InfoObj->CSSummaryBuilder =
nullptr;
539 {HashTableStartFieldOffset, &HashTableStart, 1},
542 {MemProfSectionOffset, &MemProfSectionStart, 1},
544 {BinaryIdSectionOffset, &BinaryIdSectionStart, 1},
546 {SummaryOffset,
reinterpret_cast<uint64_t *
>(TheSummary.get()),
547 (
int)(SummarySize /
sizeof(
uint64_t))},
548 {CSSummaryOffset,
reinterpret_cast<uint64_t *
>(TheCSSummary.get()),
549 (
int)CSSummarySize}};
553 for (
const auto &
I : FunctionData)
554 for (
const auto &
F :
I.getValue())
564 return writeImpl(POS);
572 if (
Error E = writeImpl(POS))
579 #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,
584 for (
uint32_t VK = 0; VK <= IPVK_Last; VK++) {
585 uint32_t NS = Func.getNumValueSites(VK);
589 uint32_t ND = Func.getNumValueDataForSite(VK,
S);
590 std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK,
S);
593 if ((VK != IPVK_IndirectCallTarget) && !SeenValues.
insert(VD[
I].Value).second)
606 OS <<
"# Func Hash:\n" << Hash <<
"\n";
607 OS <<
"# Num Counters:\n" << Func.Counts.size() <<
"\n";
608 OS <<
"# Counter Values:\n";
612 uint32_t NumValueKinds = Func.getNumValueKinds();
613 if (!NumValueKinds) {
618 OS <<
"# Num Value Kinds:\n" << Func.getNumValueKinds() <<
"\n";
619 for (
uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {
620 uint32_t NS = Func.getNumValueSites(VK);
624 OS <<
"# NumValueSites:\n" << NS <<
"\n";
626 uint32_t ND = Func.getNumValueDataForSite(VK,
S);
628 std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK,
S);
630 if (VK == IPVK_IndirectCallTarget)
632 << VD[
I].Count <<
"\n";
634 OS << VD[
I].Value <<
":" << VD[
I].Count <<
"\n";
645 OS <<
"# CSIR level Instrumentation Flag\n:csir\n";
647 OS <<
"# IR level Instrumentation Flag\n:ir\n";
649 if (
static_cast<bool>(ProfileKind &
651 OS <<
"# Always instrument the function entry block\n:entry_first\n";
655 using RecordType = std::pair<StringRef, FuncPair>;
658 for (
const auto &
I : FunctionData) {
659 if (shouldEncodeData(
I.getValue())) {
662 for (
const auto &Func :
I.getValue())
663 OrderedFuncData.push_back(std::make_pair(
I.getKey(), Func));
668 return std::tie(A.first, A.second.first) <
669 std::tie(
B.first,
B.second.first);
672 for (
const auto &record : OrderedFuncData) {
674 const FuncPair &Func = record.second;
678 for (
const auto &record : OrderedFuncData) {
679 const FuncPair &Func = record.second;
uint64_t tell() const
tell - Return the current offset with the file.
Error writeText(raw_fd_ostream &OS)
Write the profile in text format to OS.
This is an optimization pass for GlobalISel generic memory operations.
constexpr support::endianness Endianness
The endianness of all multi-byte encoded values in MessagePack.
@ FunctionEntryInstrumentation
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Error write(raw_fd_ostream &OS)
Write the profile to OS.
Generates an on disk hash table.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
A raw_ostream that writes to an std::string.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
uint64_t ComputeHash(StringRef K)
Helper object to track which of three possible relocation mechanisms are used for a particular value ...
static MemProfSchema getSchema()
static ErrorSuccess success()
Create a success value.
void overlap(InstrProfRecord &Other, OverlapStats &Overlap, OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff)
Compute the overlap b/w this IntrprofRecord and Other.
InstrProfSummaryBuilder * CSSummaryBuilder
support::endianness ValueProfDataEndianness
Adapter to write values to a stream in a particular byte order.
std::pair< iterator, bool > insert(const ValueT &V)
Profiling information for a single function.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
uint32_t getNumCounts() const
offset_type Emit(raw_ostream &Out)
Emit the table to Out, which must not be at offset 0.
static void writeRecordInText(StringRef Name, uint64_t Hash, const InstrProfRecord &Counters, InstrProfSymtab &Symtab, raw_fd_ostream &OS)
Write Record in text format to OS.
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type)
uint32_t getNumFunctions() const
uint64_t getMaxInternalCount() const
uint64_t getTotalCount() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
raw_ostream & write(unsigned char C)
This class implements an extremely fast bulk output stream that can only output to a stream.
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
uint64_t seek(uint64_t off)
Flushes the stream and repositions the underlying file descriptor position to the offset specified fr...
@ TotalBlockCount
The sum of all instrumented block counts.
static const char * ValueProfKindStr[]
uint64_t getMaxFunctionCount() const
void sort(IteratorTy Start, IteratorTy End)
An efficient, type-erasing, non-owning reference to a callable.
@ MaxFunctionCount
The maximal execution count among all functions.
void set(SummaryFieldKind K, uint64_t V)
const InstrProfWriter::ProfilingData *const data_type
void patch(PatchItem *P, int NItems)
const SummaryEntryVector & getDetailedSummary()
void setValueProfDataEndianness(support::endianness Endianness)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align)
InstrProfSummaryBuilder * SummaryBuilder
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F, function_ref< void(Error)> Warn)
Add a memprof frame identified by the hash of the contents of the frame in FrameId.
void addMemProfRecord(const GlobalValue::GUID Id, const memprof::IndexedMemProfRecord &Record)
Add a memprof record for a function identified by its Id.
static void setSummary(IndexedInstrProf::Summary *TheSummary, ProfileSummary &PS)
InstrProfWriter(bool Sparse=false)
std::unique_ptr< Summary > allocSummary(uint32_t TotalSize)
@ TotalNumFunctions
The total number of functions instrumented.
static unsigned getSize(unsigned Kind)
void addRecord(NamedInstrProfRecord &&I, uint64_t Weight, function_ref< void(Error)> Warn)
Add function counts for the given function.
ProfOStream(raw_fd_ostream &FD)
void setEntry(uint32_t I, const ProfileSummaryEntry &E)
void addOneUnique(const CountSumOrPercent &UniqueFunc)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
StringRef - Represent a constant reference to a string, i.e.
void write(ArrayRef< value_type > Val)
static bool hasCSFlagInHash(uint64_t FuncHash)
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
uint64_t NumCutoffEntries
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
void merge(const IndexedMemProfRecord &Other)
A raw_ostream that writes to a file descriptor.
void merge(InstrProfRecord &Other, uint64_t Weight, function_ref< void(instrprof_error)> Warn)
Merge the counts in Other into this one.
constexpr size_t size() const
size - Get the string size.
std::unique_ptr< MemoryBuffer > writeBuffer()
Write the profile, returning the raw data. For testing.
void writeByte(uint8_t V)
@ MaxInternalBlockCount
Max internal block count of the program (excluding entry blocks).
A symbol table used for function PGO name look-up with keys (such as pointers, md5hash values) to the...
Lightweight error class with error context and mandatory checking.
const InstrProfWriter::ProfilingData *const data_type_ref
Error validateRecord(const InstrProfRecord &Func)
@ MaxBlockCount
Max block count of the program.
support::endian::Writer LE
void addBinaryIds(ArrayRef< llvm::object::BuildID > BIs)
CountSumOrPercent Overlap
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N)
Error addFuncName(StringRef FuncName)
Update the symtab by adding FuncName to the table.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::iterator DenseMapIterator< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > > iterator
std::optional< std::vector< StOtherPiece > > Other
void sortValueData()
Sort value profile data (per site) by count.
StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash)
Just like getFuncName, except that it will return a non-empty StringRef if the function is external t...
const std::string NameFilter
size_t size() const
size - Get the array size.
Reimplement select in terms of SEL *We would really like to support but we need to prove that the add doesn t need to overflow between the two bit chunks *Implement pre post increment support(e.g. PR935) *Implement smarter const ant generation for binops with large immediates. A few ARMv6T2 ops should be pattern matched
void addRecord(const InstrProfRecord &)
static std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
@ TotalNumBlocks
Total number of instrumented blocks/edges.
static hash_value_type ComputeHash(key_type_ref K)
void setOutputSparse(bool Sparse)
static std::unique_ptr< MemoryBuffer > getMemBufferCopy(StringRef InputData, const Twine &BufferName="")
Open the specified memory range as a MemoryBuffer, copying the contents and taking ownership of it.
void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref< void(Error)> Warn)
Merge existing function counts from the given writer.
std::string & str()
Returns the string's reference.
uint64_t NumSummaryFields
static const ArrayRef< uint32_t > DefaultCutoffs
A vector of useful cutoff values for detailed summary.
LLVM Value Representation.
void addOneMismatch(const CountSumOrPercent &MismatchFunc)
std::vector< uint64_t > Counts
uint64_t getMaxCount() const
ProfOStream(raw_string_ostream &STR)
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap, OverlapStats &FuncLevelOverlap, const OverlapFuncFilters &FuncFilter)
void scale(uint64_t N, uint64_t D, function_ref< void(instrprof_error)> Warn)
Scale up profile counts (including value profile data) by a factor of (N / D).