Bug Summary

File:build/source/llvm/tools/llvm-profgen/ProfileGenerator.cpp
Warning:line 1113, column 5
Forming reference to null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ProfileGenerator.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16 -I tools/llvm-profgen -I /build/source/llvm/tools/llvm-profgen -I include -I /build/source/llvm/include -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1670969321 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-12-14-010809-15973-1 -x c++ /build/source/llvm/tools/llvm-profgen/ProfileGenerator.cpp
1//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "ProfileGenerator.h"
9#include "ErrorHandling.h"
10#include "PerfReader.h"
11#include "ProfiledBinary.h"
12#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
13#include "llvm/ProfileData/ProfileCommon.h"
14#include <algorithm>
15#include <float.h>
16#include <unordered_set>
17#include <utility>
18
19cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
20 cl::Required,
21 cl::desc("Output profile file"));
22static cl::alias OutputA("o", cl::desc("Alias for --output"),
23 cl::aliasopt(OutputFilename));
24
25static cl::opt<SampleProfileFormat> OutputFormat(
26 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
27 cl::values(
28 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)")llvm::cl::OptionEnumValue { "binary", int(SPF_Binary), "Binary encoding (default)"
}
,
29 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding")llvm::cl::OptionEnumValue { "compbinary", int(SPF_Compact_Binary
), "Compact binary encoding" }
,
30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding")llvm::cl::OptionEnumValue { "extbinary", int(SPF_Ext_Binary),
"Extensible binary encoding" }
,
31 clEnumValN(SPF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(SPF_Text), "Text encoding"
}
,
32 clEnumValN(SPF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
33 "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
));
34
35static cl::opt<bool> UseMD5(
36 "use-md5", cl::Hidden,
37 cl::desc("Use md5 to represent function names in the output profile (only "
38 "meaningful for -extbinary)"));
39
40static cl::opt<bool> PopulateProfileSymbolList(
41 "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43
44static cl::opt<bool> FillZeroForAllFuncs(
45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46 cl::desc("Attribute all functions' range with zero count "
47 "even it's not hit by any samples."));
48
49static cl::opt<int32_t, true> RecursionCompression(
50 "compress-recursion",
51 cl::desc("Compressing recursion by deduplicating adjacent frame "
52 "sequences up to the specified size. -1 means no size limit."),
53 cl::Hidden,
54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55
56static cl::opt<bool>
57 TrimColdProfile("trim-cold-profile",
58 cl::desc("If the total count of the profile is smaller "
59 "than threshold, it will be trimmed."));
60
61static cl::opt<bool> CSProfMergeColdContext(
62 "csprof-merge-cold-context", cl::init(true),
63 cl::desc("If the total count of context profile is smaller than "
64 "the threshold, it will be merged into context-less base "
65 "profile."));
66
67static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68 "csprof-max-cold-context-depth", cl::init(1),
69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70 "context-less base profile"));
71
72static cl::opt<int, true> CSProfMaxContextDepth(
73 "csprof-max-context-depth",
74 cl::desc("Keep the last K contexts while merging profile. -1 means no "
75 "depth limit."),
76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77
78static cl::opt<double> HotFunctionDensityThreshold(
79 "hot-function-density-threshold", llvm::cl::init(1000),
80 llvm::cl::desc(
81 "specify density threshold for hot functions (default: 1000)"),
82 llvm::cl::Optional);
83static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84 llvm::cl::desc("show profile density details"),
85 llvm::cl::Optional);
86
87static cl::opt<bool> UpdateTotalSamples(
88 "update-total-samples", llvm::cl::init(false),
89 llvm::cl::desc(
90 "Update total samples by accumulating all its body samples."),
91 llvm::cl::Optional);
92
93static cl::opt<bool> GenCSNestedProfile(
94 "gen-cs-nested-profile", cl::Hidden, cl::init(true),
95 cl::desc("Generate nested function profiles for CSSPGO"));
96
97using namespace llvm;
98using namespace sampleprof;
99
100namespace llvm {
101extern cl::opt<int> ProfileSummaryCutoffHot;
102extern cl::opt<bool> UseContextLessSummary;
103
104namespace sampleprof {
105
106// Initialize the MaxCompressionSize to -1 which means no size limit
107int32_t CSProfileGenerator::MaxCompressionSize = -1;
108
109int CSProfileGenerator::MaxContextDepth = -1;
110
111bool ProfileGeneratorBase::UseFSDiscriminator = false;
112
113std::unique_ptr<ProfileGeneratorBase>
114ProfileGeneratorBase::create(ProfiledBinary *Binary,
115 const ContextSampleCounterMap *SampleCounters,
116 bool ProfileIsCS) {
117 std::unique_ptr<ProfileGeneratorBase> Generator;
118 if (ProfileIsCS) {
119 if (Binary->useFSDiscriminator())
120 exitWithError("FS discriminator is not supported in CS profile.");
121 Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
122 } else {
123 Generator.reset(new ProfileGenerator(Binary, SampleCounters));
124 }
125 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
126 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
127
128 return Generator;
129}
130
131std::unique_ptr<ProfileGeneratorBase>
132ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
133 bool ProfileIsCS) {
134 std::unique_ptr<ProfileGeneratorBase> Generator;
135 if (ProfileIsCS) {
136 if (Binary->useFSDiscriminator())
137 exitWithError("FS discriminator is not supported in CS profile.");
138 Generator.reset(new CSProfileGenerator(Binary, Profiles));
139 } else {
140 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
141 }
142 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
143 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
144
145 return Generator;
146}
147
148void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
149 SampleProfileMap &ProfileMap) {
150 // Populate profile symbol list if extended binary format is used.
151 ProfileSymbolList SymbolList;
152
153 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
154 Binary->populateSymbolListFromDWARF(SymbolList);
155 Writer->setProfileSymbolList(&SymbolList);
156 }
157
158 if (std::error_code EC = Writer->write(ProfileMap))
159 exitWithError(std::move(EC));
160}
161
162void ProfileGeneratorBase::write() {
163 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
164 if (std::error_code EC = WriterOrErr.getError())
165 exitWithError(EC, OutputFilename);
166
167 if (UseMD5) {
168 if (OutputFormat != SPF_Ext_Binary)
169 WithColor::warning() << "-use-md5 is ignored. Specify "
170 "--format=extbinary to enable it\n";
171 else
172 WriterOrErr.get()->setUseMD5();
173 }
174
175 write(std::move(WriterOrErr.get()), ProfileMap);
176}
177
178void ProfileGeneratorBase::showDensitySuggestion(double Density) {
179 if (Density == 0.0)
180 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
181 "set too low. Please check your command.\n";
182 else if (Density < HotFunctionDensityThreshold)
183 WithColor::warning()
184 << "AutoFDO is estimated to optimize better with "
185 << format("%.1f", HotFunctionDensityThreshold / Density)
186 << "x more samples. Please consider increasing sampling rate or "
187 "profiling for longer duration to get more samples.\n";
188
189 if (ShowDensity)
190 outs() << "Minimum profile density for hot functions with top "
191 << format("%.2f",
192 static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
193 10000)
194 << "% total samples: " << format("%.1f", Density) << "\n";
195}
196
197double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
198 uint64_t HotCntThreshold) {
199 double Density = DBL_MAX1.7976931348623157e+308;
200 std::vector<const FunctionSamples *> HotFuncs;
201 for (auto &I : Profiles) {
202 auto &FuncSamples = I.second;
203 if (FuncSamples.getTotalSamples() < HotCntThreshold)
204 continue;
205 HotFuncs.emplace_back(&FuncSamples);
206 }
207
208 for (auto *FuncSamples : HotFuncs) {
209 auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
210 if (!Func)
211 continue;
212 uint64_t FuncSize = Func->getFuncSize();
213 if (FuncSize == 0)
214 continue;
215 Density =
216 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
217 FuncSize);
218 }
219
220 return Density == DBL_MAX1.7976931348623157e+308 ? 0.0 : Density;
221}
222
223void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
224 const RangeSample &Ranges) {
225
226 /*
227 Regions may overlap with each other. Using the boundary info, find all
228 disjoint ranges and their sample count. BoundaryPoint contains the count
229 multiple samples begin/end at this points.
230
231 |<--100-->| Sample1
232 |<------200------>| Sample2
233 A B C
234
235 In the example above,
236 Sample1 begins at A, ends at B, its value is 100.
237 Sample2 beings at A, ends at C, its value is 200.
238 For A, BeginCount is the sum of sample begins at A, which is 300 and no
239 samples ends at A, so EndCount is 0.
240 Then boundary points A, B, and C with begin/end counts are:
241 A: (300, 0)
242 B: (0, 100)
243 C: (0, 200)
244 */
245 struct BoundaryPoint {
246 // Sum of sample counts beginning at this point
247 uint64_t BeginCount = UINT64_MAX(18446744073709551615UL);
248 // Sum of sample counts ending at this point
249 uint64_t EndCount = UINT64_MAX(18446744073709551615UL);
250 // Is the begin point of a zero range.
251 bool IsZeroRangeBegin = false;
252 // Is the end point of a zero range.
253 bool IsZeroRangeEnd = false;
254
255 void addBeginCount(uint64_t Count) {
256 if (BeginCount == UINT64_MAX(18446744073709551615UL))
257 BeginCount = 0;
258 BeginCount += Count;
259 }
260
261 void addEndCount(uint64_t Count) {
262 if (EndCount == UINT64_MAX(18446744073709551615UL))
263 EndCount = 0;
264 EndCount += Count;
265 }
266 };
267
268 /*
269 For the above example. With boundary points, follwing logic finds two
270 disjoint region of
271
272 [A,B]: 300
273 [B+1,C]: 200
274
275 If there is a boundary point that both begin and end, the point itself
276 becomes a separate disjoint region. For example, if we have original
277 ranges of
278
279 |<--- 100 --->|
280 |<--- 200 --->|
281 A B C
282
283 there are three boundary points with their begin/end counts of
284
285 A: (100, 0)
286 B: (200, 100)
287 C: (0, 200)
288
289 the disjoint ranges would be
290
291 [A, B-1]: 100
292 [B, B]: 300
293 [B+1, C]: 200.
294
295 Example for zero value range:
296
297 |<--- 100 --->|
298 |<--- 200 --->|
299 |<--------------- 0 ----------------->|
300 A B C D E F
301
302 [A, B-1] : 0
303 [B, C] : 100
304 [C+1, D-1]: 0
305 [D, E] : 200
306 [E+1, F] : 0
307 */
308 std::map<uint64_t, BoundaryPoint> Boundaries;
309
310 for (const auto &Item : Ranges) {
311 assert(Item.first.first <= Item.first.second &&(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 312, __extension__
__PRETTY_FUNCTION__))
312 "Invalid instruction range")(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 312, __extension__
__PRETTY_FUNCTION__))
;
313 auto &BeginPoint = Boundaries[Item.first.first];
314 auto &EndPoint = Boundaries[Item.first.second];
315 uint64_t Count = Item.second;
316
317 BeginPoint.addBeginCount(Count);
318 EndPoint.addEndCount(Count);
319 if (Count == 0) {
320 BeginPoint.IsZeroRangeBegin = true;
321 EndPoint.IsZeroRangeEnd = true;
322 }
323 }
324
325 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
326 // and the next valid address
327 uint64_t BeginAddress = UINT64_MAX(18446744073709551615UL);
328 int ZeroRangeDepth = 0;
329 uint64_t Count = 0;
330 for (const auto &Item : Boundaries) {
331 uint64_t Address = Item.first;
332 const BoundaryPoint &Point = Item.second;
333 if (Point.BeginCount != UINT64_MAX(18446744073709551615UL)) {
334 if (BeginAddress != UINT64_MAX(18446744073709551615UL))
335 DisjointRanges[{BeginAddress, Address - 1}] = Count;
336 Count += Point.BeginCount;
337 BeginAddress = Address;
338 ZeroRangeDepth += Point.IsZeroRangeBegin;
339 }
340 if (Point.EndCount != UINT64_MAX(18446744073709551615UL)) {
341 assert((BeginAddress != UINT64_MAX) &&(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 342, __extension__
__PRETTY_FUNCTION__))
342 "First boundary point cannot be 'end' point")(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 342, __extension__
__PRETTY_FUNCTION__))
;
343 DisjointRanges[{BeginAddress, Address}] = Count;
344 assert(Count >= Point.EndCount && "Mismatched live ranges")(static_cast <bool> (Count >= Point.EndCount &&
"Mismatched live ranges") ? void (0) : __assert_fail ("Count >= Point.EndCount && \"Mismatched live ranges\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 344, __extension__
__PRETTY_FUNCTION__))
;
345 Count -= Point.EndCount;
346 BeginAddress = Address + 1;
347 ZeroRangeDepth -= Point.IsZeroRangeEnd;
348 // If the remaining count is zero and it's no longer in a zero range, this
349 // means we consume all the ranges before, thus mark BeginAddress as
350 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
351 // [<---- 10 ---->]
352 // [<---- 20 ---->]
353 // A B C D
354 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
355 // have the [B+1, C-1] zero range.
356 if (Count == 0 && ZeroRangeDepth == 0)
357 BeginAddress = UINT64_MAX(18446744073709551615UL);
358 }
359 }
360}
361
362void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
363 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
364 uint64_t Count) {
365 // Use the maximum count of samples with same line location
366 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
367
368 // Use duplication factor to compensated for loop unroll/vectorization.
369 // Note that this is only needed when we're taking MAX of the counts at
370 // the location instead of SUM.
371 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
372
373 ErrorOr<uint64_t> R =
374 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
375
376 uint64_t PreviousCount = R ? R.get() : 0;
377 if (PreviousCount <= Count) {
378 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
379 Count - PreviousCount);
380 }
381}
382
383void ProfileGeneratorBase::updateTotalSamples() {
384 for (auto &Item : ProfileMap) {
385 FunctionSamples &FunctionProfile = Item.second;
386 FunctionProfile.updateTotalSamples();
387 }
388}
389
390void ProfileGeneratorBase::updateCallsiteSamples() {
391 for (auto &Item : ProfileMap) {
392 FunctionSamples &FunctionProfile = Item.second;
393 FunctionProfile.updateCallsiteSamples();
394 }
395}
396
397void ProfileGeneratorBase::updateFunctionSamples() {
398 updateCallsiteSamples();
399
400 if (UpdateTotalSamples)
401 updateTotalSamples();
402}
403
404void ProfileGeneratorBase::collectProfiledFunctions() {
405 std::unordered_set<const BinaryFunction *> ProfiledFunctions;
406 if (collectFunctionsFromRawProfile(ProfiledFunctions))
407 Binary->setProfiledFunctions(ProfiledFunctions);
408 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
409 Binary->setProfiledFunctions(ProfiledFunctions);
410 else
411 llvm_unreachable("Unsupported input profile")::llvm::llvm_unreachable_internal("Unsupported input profile"
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 411)
;
412}
413
414bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
415 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
416 if (!SampleCounters)
417 return false;
418 // Go through all the stacks, ranges and branches in sample counters, use
419 // the start of the range to look up the function it belongs and record the
420 // function.
421 for (const auto &CI : *SampleCounters) {
422 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
423 for (auto StackAddr : CtxKey->Context) {
424 if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
425 ProfiledFunctions.insert(FRange->Func);
426 }
427 }
428
429 for (auto Item : CI.second.RangeCounter) {
430 uint64_t StartAddress = Item.first.first;
431 if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
432 ProfiledFunctions.insert(FRange->Func);
433 }
434
435 for (auto Item : CI.second.BranchCounter) {
436 uint64_t SourceAddress = Item.first.first;
437 uint64_t TargetAddress = Item.first.second;
438 if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
439 ProfiledFunctions.insert(FRange->Func);
440 if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
441 ProfiledFunctions.insert(FRange->Func);
442 }
443 }
444 return true;
445}
446
447bool ProfileGenerator::collectFunctionsFromLLVMProfile(
448 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
449 for (const auto &FS : ProfileMap) {
450 if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
451 ProfiledFunctions.insert(Func);
452 }
453 return true;
454}
455
456bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
457 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
458 for (auto *Node : ContextTracker) {
459 if (!Node->getFuncName().empty())
460 if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
461 ProfiledFunctions.insert(Func);
462 }
463 return true;
464}
465
466FunctionSamples &
467ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
468 SampleContext Context(FuncName);
469 auto Ret = ProfileMap.emplace(Context, FunctionSamples());
470 if (Ret.second) {
471 FunctionSamples &FProfile = Ret.first->second;
472 FProfile.setContext(Context);
473 }
474 return Ret.first->second;
475}
476
477void ProfileGenerator::generateProfile() {
478 collectProfiledFunctions();
479
480 if (Binary->usePseudoProbes())
481 Binary->decodePseudoProbe();
482
483 if (SampleCounters) {
484 if (Binary->usePseudoProbes()) {
485 generateProbeBasedProfile();
486 } else {
487 generateLineNumBasedProfile();
488 }
489 }
490
491 postProcessProfiles();
492}
493
494void ProfileGenerator::postProcessProfiles() {
495 computeSummaryAndThreshold(ProfileMap);
496 trimColdProfiles(ProfileMap, ColdCountThreshold);
497 calculateAndShowDensity(ProfileMap);
498}
499
500void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
501 uint64_t ColdCntThreshold) {
502 if (!TrimColdProfile)
503 return;
504
505 // Move cold profiles into a tmp container.
506 std::vector<SampleContext> ColdProfiles;
507 for (const auto &I : ProfileMap) {
508 if (I.second.getTotalSamples() < ColdCntThreshold)
509 ColdProfiles.emplace_back(I.first);
510 }
511
512 // Remove the cold profile from ProfileMap.
513 for (const auto &I : ColdProfiles)
514 ProfileMap.erase(I);
515}
516
517void ProfileGenerator::generateLineNumBasedProfile() {
518 assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 519, __extension__
__PRETTY_FUNCTION__))
519 "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 519, __extension__
__PRETTY_FUNCTION__))
;
520 const SampleCounter &SC = SampleCounters->begin()->second;
521 // Fill in function body samples
522 populateBodySamplesForAllFunctions(SC.RangeCounter);
523 // Fill in boundary sample counts as well as call site samples for calls
524 populateBoundarySamplesForAllFunctions(SC.BranchCounter);
525
526 updateFunctionSamples();
527}
528
529void ProfileGenerator::generateProbeBasedProfile() {
530 assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 531, __extension__
__PRETTY_FUNCTION__))
531 "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 531, __extension__
__PRETTY_FUNCTION__))
;
532 // Enable pseudo probe functionalities in SampleProf
533 FunctionSamples::ProfileIsProbeBased = true;
534 const SampleCounter &SC = SampleCounters->begin()->second;
535 // Fill in function body samples
536 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
537 // Fill in boundary sample counts as well as call site samples for calls
538 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
539
540 updateFunctionSamples();
541}
542
543void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
544 const RangeSample &RangeCounter) {
545 ProbeCounterMap ProbeCounter;
546 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
547 // inside extractProbesFromRange.
548 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
549 false);
550
551 for (const auto &PI : ProbeCounter) {
552 const MCDecodedPseudoProbe *Probe = PI.first;
553 uint64_t Count = PI.second;
554 SampleContextFrameVector FrameVec;
555 Binary->getInlineContextForProbe(Probe, FrameVec, true);
556 FunctionSamples &FunctionProfile =
557 getLeafProfileAndAddTotalSamples(FrameVec, Count);
558 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
559 if (Probe->isEntry())
560 FunctionProfile.addHeadSamples(Count);
561 }
562}
563
564void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
565 const BranchSample &BranchCounters) {
566 for (const auto &Entry : BranchCounters) {
567 uint64_t SourceAddress = Entry.first.first;
568 uint64_t TargetAddress = Entry.first.second;
569 uint64_t Count = Entry.second;
570 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 570, __extension__
__PRETTY_FUNCTION__))
;
571
572 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
573 if (CalleeName.size() == 0)
574 continue;
575
576 const MCDecodedPseudoProbe *CallProbe =
577 Binary->getCallProbeForAddr(SourceAddress);
578 if (CallProbe == nullptr)
579 continue;
580
581 // Record called target sample and its count.
582 SampleContextFrameVector FrameVec;
583 Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
584
585 if (!FrameVec.empty()) {
586 FunctionSamples &FunctionProfile =
587 getLeafProfileAndAddTotalSamples(FrameVec, 0);
588 FunctionProfile.addCalledTargetSamples(
589 FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
590 }
591 }
592}
593
594FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
595 const SampleContextFrameVector &FrameVec, uint64_t Count) {
596 // Get top level profile
597 FunctionSamples *FunctionProfile =
598 &getTopLevelFunctionProfile(FrameVec[0].FuncName);
599 FunctionProfile->addTotalSamples(Count);
600 if (Binary->usePseudoProbes()) {
601 const auto *FuncDesc = Binary->getFuncDescForGUID(
602 Function::getGUID(FunctionProfile->getName()));
603 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
604 }
605
606 for (size_t I = 1; I < FrameVec.size(); I++) {
607 LineLocation Callsite(
608 FrameVec[I - 1].Location.LineOffset,
609 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
610 FunctionSamplesMap &SamplesMap =
611 FunctionProfile->functionSamplesAt(Callsite);
612 auto Ret =
613 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
614 if (Ret.second) {
615 SampleContext Context(FrameVec[I].FuncName);
616 Ret.first->second.setContext(Context);
617 }
618 FunctionProfile = &Ret.first->second;
619 FunctionProfile->addTotalSamples(Count);
620 if (Binary->usePseudoProbes()) {
621 const auto *FuncDesc = Binary->getFuncDescForGUID(
622 Function::getGUID(FunctionProfile->getName()));
623 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
624 }
625 }
626
627 return *FunctionProfile;
628}
629
630RangeSample
631ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
632 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
633 if (FillZeroForAllFuncs) {
634 for (auto &FuncI : Binary->getAllBinaryFunctions()) {
635 for (auto &R : FuncI.second.Ranges) {
636 Ranges[{R.first, R.second - 1}] += 0;
637 }
638 }
639 } else {
640 // For each range, we search for all ranges of the function it belongs to
641 // and initialize it with zero count, so it remains zero if doesn't hit any
642 // samples. This is to be consistent with compiler that interpret zero count
643 // as unexecuted(cold).
644 for (const auto &I : RangeCounter) {
645 uint64_t StartAddress = I.first.first;
646 for (const auto &Range : Binary->getRanges(StartAddress))
647 Ranges[{Range.first, Range.second - 1}] += 0;
648 }
649 }
650 RangeSample DisjointRanges;
651 findDisjointRanges(DisjointRanges, Ranges);
652 return DisjointRanges;
653}
654
655void ProfileGenerator::populateBodySamplesForAllFunctions(
656 const RangeSample &RangeCounter) {
657 for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
658 uint64_t RangeBegin = Range.first.first;
659 uint64_t RangeEnd = Range.first.second;
660 uint64_t Count = Range.second;
661
662 InstructionPointer IP(Binary, RangeBegin, true);
663 // Disjoint ranges may have range in the middle of two instr,
664 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
665 // can be Addr1+1 to Addr2-1. We should ignore such range.
666 if (IP.Address > RangeEnd)
667 continue;
668
669 do {
670 const SampleContextFrameVector FrameVec =
671 Binary->getFrameLocationStack(IP.Address);
672 if (!FrameVec.empty()) {
673 // FIXME: As accumulating total count per instruction caused some
674 // regression, we changed to accumulate total count per byte as a
675 // workaround. Tuning hotness threshold on the compiler side might be
676 // necessary in the future.
677 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
678 FrameVec, Count * Binary->getInstSize(IP.Address));
679 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
680 Count);
681 }
682 } while (IP.advance() && IP.Address <= RangeEnd);
683 }
684}
685
686StringRef
687ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
688 // Get the function range by branch target if it's a call branch.
689 auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
690
691 // We won't accumulate sample count for a range whose start is not the real
692 // function entry such as outlined function or inner labels.
693 if (!FRange || !FRange->IsFuncEntry)
694 return StringRef();
695
696 return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
697}
698
699void ProfileGenerator::populateBoundarySamplesForAllFunctions(
700 const BranchSample &BranchCounters) {
701 for (const auto &Entry : BranchCounters) {
702 uint64_t SourceAddress = Entry.first.first;
703 uint64_t TargetAddress = Entry.first.second;
704 uint64_t Count = Entry.second;
705 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 705, __extension__
__PRETTY_FUNCTION__))
;
706
707 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
708 if (CalleeName.size() == 0)
709 continue;
710 // Record called target sample and its count.
711 const SampleContextFrameVector &FrameVec =
712 Binary->getCachedFrameLocationStack(SourceAddress);
713 if (!FrameVec.empty()) {
714 FunctionSamples &FunctionProfile =
715 getLeafProfileAndAddTotalSamples(FrameVec, 0);
716 FunctionProfile.addCalledTargetSamples(
717 FrameVec.back().Location.LineOffset,
718 getBaseDiscriminator(FrameVec.back().Location.Discriminator),
719 CalleeName, Count);
720 }
721 // Add head samples for callee.
722 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
723 CalleeProfile.addHeadSamples(Count);
724 }
725}
726
727void ProfileGeneratorBase::calculateAndShowDensity(
728 const SampleProfileMap &Profiles) {
729 double Density = calculateDensity(Profiles, HotCountThreshold);
730 showDensitySuggestion(Density);
731}
732
733FunctionSamples *
734CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
735 bool WasLeafInlined) {
736 FunctionSamples *FProfile = ContextNode->getFunctionSamples();
737 if (!FProfile) {
738 FSamplesList.emplace_back();
739 FProfile = &FSamplesList.back();
740 FProfile->setName(ContextNode->getFuncName());
741 ContextNode->setFunctionSamples(FProfile);
742 }
743 // Update ContextWasInlined attribute for existing contexts.
744 // The current function can be called in two ways:
745 // - when processing a probe of the current frame
746 // - when processing the entry probe of an inlinee's frame, which
747 // is then used to update the callsite count of the current frame.
748 // The two can happen in any order, hence here we are making sure
749 // `ContextWasInlined` is always set as expected.
750 // TODO: Note that the former does not always happen if no probes of the
751 // current frame has samples, and if the latter happens, we could lose the
752 // attribute. This should be fixed.
753 if (WasLeafInlined)
754 FProfile->getContext().setAttribute(ContextWasInlined);
755 return FProfile;
756}
757
758ContextTrieNode *
759CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
760 bool WasLeafInlined) {
761 ContextTrieNode *ContextNode =
762 ContextTracker.getOrCreateContextPath(Context, true);
763 getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
764 return ContextNode;
765}
766
767void CSProfileGenerator::generateProfile() {
768 FunctionSamples::ProfileIsCS = true;
769
770 collectProfiledFunctions();
771
772 if (Binary->usePseudoProbes())
1
Assuming the condition is true
2
Taking true branch
773 Binary->decodePseudoProbe();
774
775 if (SampleCounters) {
3
Assuming field 'SampleCounters' is non-null
4
Taking true branch
776 if (Binary->usePseudoProbes()) {
5
Assuming the condition is true
6
Taking true branch
777 generateProbeBasedProfile();
7
Calling 'CSProfileGenerator::generateProbeBasedProfile'
778 } else {
779 generateLineNumBasedProfile();
780 }
781 }
782
783 if (Binary->getTrackFuncContextSize())
784 computeSizeForProfiledFunctions();
785
786 postProcessProfiles();
787}
788
789void CSProfileGenerator::computeSizeForProfiledFunctions() {
790 for (auto *Func : Binary->getProfiledFunctions())
791 Binary->computeInlinedContextSizeForFunc(Func);
792
793 // Flush the symbolizer to save memory.
794 Binary->flushSymbolizer();
795}
796
797void CSProfileGenerator::updateFunctionSamples() {
798 for (auto *Node : ContextTracker) {
799 FunctionSamples *FSamples = Node->getFunctionSamples();
800 if (FSamples) {
801 if (UpdateTotalSamples)
802 FSamples->updateTotalSamples();
803 FSamples->updateCallsiteSamples();
804 }
805 }
806}
807
808void CSProfileGenerator::generateLineNumBasedProfile() {
809 for (const auto &CI : *SampleCounters) {
810 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
811
812 ContextTrieNode *ContextNode = &getRootContext();
813 // Sample context will be empty if the jump is an external-to-internal call
814 // pattern, the head samples should be added for the internal function.
815 if (!CtxKey->Context.empty()) {
816 // Get or create function profile for the range
817 ContextNode =
818 getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
819 // Fill in function body samples
820 populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
821 CI.second.RangeCounter);
822 }
823 // Fill in boundary sample counts as well as call site samples for calls
824 populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
825 }
826 // Fill in call site value sample for inlined calls and also use context to
827 // infer missing samples. Since we don't have call count for inlined
828 // functions, we estimate it from inlinee's profile using the entry of the
829 // body sample.
830 populateInferredFunctionSamples(getRootContext());
831
832 updateFunctionSamples();
833}
834
835void CSProfileGenerator::populateBodySamplesForFunction(
836 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
837 // Compute disjoint ranges first, so we can use MAX
838 // for calculating count for each location.
839 RangeSample Ranges;
840 findDisjointRanges(Ranges, RangeCounter);
841 for (const auto &Range : Ranges) {
842 uint64_t RangeBegin = Range.first.first;
843 uint64_t RangeEnd = Range.first.second;
844 uint64_t Count = Range.second;
845 // Disjoint ranges have introduce zero-filled gap that
846 // doesn't belong to current context, filter them out.
847 if (Count == 0)
848 continue;
849
850 InstructionPointer IP(Binary, RangeBegin, true);
851 // Disjoint ranges may have range in the middle of two instr,
852 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
853 // can be Addr1+1 to Addr2-1. We should ignore such range.
854 if (IP.Address > RangeEnd)
855 continue;
856
857 do {
858 auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
859 if (LeafLoc) {
860 // Recording body sample for this specific context
861 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
862 FunctionProfile.addTotalSamples(Count);
863 }
864 } while (IP.advance() && IP.Address <= RangeEnd);
865 }
866}
867
868void CSProfileGenerator::populateBoundarySamplesForFunction(
869 ContextTrieNode *Node, const BranchSample &BranchCounters) {
870
871 for (const auto &Entry : BranchCounters) {
872 uint64_t SourceAddress = Entry.first.first;
873 uint64_t TargetAddress = Entry.first.second;
874 uint64_t Count = Entry.second;
875 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 875, __extension__
__PRETTY_FUNCTION__))
;
876
877 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
878 if (CalleeName.size() == 0)
879 continue;
880
881 ContextTrieNode *CallerNode = Node;
882 LineLocation CalleeCallSite(0, 0);
883 if (CallerNode != &getRootContext()) {
884 // Record called target sample and its count
885 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
886 if (LeafLoc) {
887 CallerNode->getFunctionSamples()->addCalledTargetSamples(
888 LeafLoc->Location.LineOffset,
889 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
890 Count);
891 // Record head sample for called target(callee)
892 CalleeCallSite = LeafLoc->Location;
893 }
894 }
895
896 ContextTrieNode *CalleeNode =
897 CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
898 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
899 CalleeProfile->addHeadSamples(Count);
900 }
901}
902
903void CSProfileGenerator::populateInferredFunctionSamples(
904 ContextTrieNode &Node) {
905 // There is no call jmp sample between the inliner and inlinee, we need to use
906 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
907 // sample depends on child(inlinee)'s sample, so traverse the tree in
908 // post-order.
909 for (auto &It : Node.getAllChildContext())
910 populateInferredFunctionSamples(It.second);
911
912 FunctionSamples *CalleeProfile = Node.getFunctionSamples();
913 if (!CalleeProfile)
914 return;
915 // If we already have head sample counts, we must have value profile
916 // for call sites added already. Skip to avoid double counting.
917 if (CalleeProfile->getHeadSamples())
918 return;
919 ContextTrieNode *CallerNode = Node.getParentContext();
920 // If we don't have context, nothing to do for caller's call site.
921 // This could happen for entry point function.
922 if (CallerNode == &getRootContext())
923 return;
924
925 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
926 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
927 // Since we don't have call count for inlined functions, we
928 // estimate it from inlinee's profile using entry body sample.
929 uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
930 // If we don't have samples with location, use 1 to indicate live.
931 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
932 EstimatedCallCount = 1;
933 CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
934 CallerLeafFrameLoc.Discriminator,
935 Node.getFuncName(), EstimatedCallCount);
936 CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
937 CallerLeafFrameLoc.Discriminator,
938 EstimatedCallCount);
939 CallerProfile.addTotalSamples(EstimatedCallCount);
940}
941
942void CSProfileGenerator::convertToProfileMap(
943 ContextTrieNode &Node, SampleContextFrameVector &Context) {
944 FunctionSamples *FProfile = Node.getFunctionSamples();
945 if (FProfile) {
946 Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
947 // Save the new context for future references.
948 SampleContextFrames NewContext = *Contexts.insert(Context).first;
949 auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
950 FunctionSamples &NewProfile = Ret.first->second;
951 NewProfile.getContext().setContext(NewContext);
952 Context.pop_back();
953 }
954
955 for (auto &It : Node.getAllChildContext()) {
956 ContextTrieNode &ChildNode = It.second;
957 Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
958 convertToProfileMap(ChildNode, Context);
959 Context.pop_back();
960 }
961}
962
963void CSProfileGenerator::convertToProfileMap() {
964 assert(ProfileMap.empty() &&(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie"
) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 965, __extension__
__PRETTY_FUNCTION__))
965 "ProfileMap should be empty before converting from the trie")(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie"
) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 965, __extension__
__PRETTY_FUNCTION__))
;
966 assert(IsProfileValidOnTrie &&(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed"
) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 967, __extension__
__PRETTY_FUNCTION__))
967 "Do not convert the trie twice, it's already destroyed")(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed"
) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 967, __extension__
__PRETTY_FUNCTION__))
;
968
969 SampleContextFrameVector Context;
970 for (auto &It : getRootContext().getAllChildContext())
971 convertToProfileMap(It.second, Context);
972
973 IsProfileValidOnTrie = false;
974}
975
976void CSProfileGenerator::postProcessProfiles() {
977 // Compute hot/cold threshold based on profile. This will be used for cold
978 // context profile merging/trimming.
979 computeSummaryAndThreshold();
980
981 // Run global pre-inliner to adjust/merge context profile based on estimated
982 // inline decisions.
983 if (EnableCSPreInliner) {
984 ContextTracker.populateFuncToCtxtMap();
985 CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
986 // Turn off the profile merger by default unless it is explicitly enabled.
987 if (!CSProfMergeColdContext.getNumOccurrences())
988 CSProfMergeColdContext = false;
989 }
990
991 convertToProfileMap();
992
993 // Trim and merge cold context profile using cold threshold above.
994 if (TrimColdProfile || CSProfMergeColdContext) {
995 SampleContextTrimmer(ProfileMap)
996 .trimAndMergeColdContextProfiles(
997 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
998 CSProfMaxColdContextDepth, EnableCSPreInliner);
999 }
1000
1001 // Merge function samples of CS profile to calculate profile density.
1002 sampleprof::SampleProfileMap ContextLessProfiles;
1003 for (const auto &I : ProfileMap) {
1004 ContextLessProfiles[I.second.getName()].merge(I.second);
1005 }
1006
1007 calculateAndShowDensity(ContextLessProfiles);
1008 if (GenCSNestedProfile) {
1009 CSProfileConverter CSConverter(ProfileMap);
1010 CSConverter.convertProfiles();
1011 FunctionSamples::ProfileIsCS = false;
1012 }
1013}
1014
1015void ProfileGeneratorBase::computeSummaryAndThreshold(
1016 SampleProfileMap &Profiles) {
1017 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1018 Summary = Builder.computeSummaryForProfiles(Profiles);
1019 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1020 (Summary->getDetailedSummary()));
1021 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1022 (Summary->getDetailedSummary()));
1023}
1024
1025void CSProfileGenerator::computeSummaryAndThreshold() {
1026 // Always merge and use context-less profile map to compute summary.
1027 SampleProfileMap ContextLessProfiles;
1028 ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1029
1030 // Set the flag below to avoid merging the profile again in
1031 // computeSummaryAndThreshold
1032 FunctionSamples::ProfileIsCS = false;
1033 assert((static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1036, __extension__
__PRETTY_FUNCTION__))
1034 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1036, __extension__
__PRETTY_FUNCTION__))
1035 "Don't set --profile-summary-contextless to false for profile "(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1036, __extension__
__PRETTY_FUNCTION__))
1036 "generation")(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1036, __extension__
__PRETTY_FUNCTION__))
;
1037 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1038 // Recover the old value.
1039 FunctionSamples::ProfileIsCS = true;
1040}
1041
1042void ProfileGeneratorBase::extractProbesFromRange(
1043 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1044 bool FindDisjointRanges) {
1045 const RangeSample *PRanges = &RangeCounter;
1046 RangeSample Ranges;
1047 if (FindDisjointRanges) {
1048 findDisjointRanges(Ranges, RangeCounter);
1049 PRanges = &Ranges;
1050 }
1051
1052 for (const auto &Range : *PRanges) {
1053 uint64_t RangeBegin = Range.first.first;
1054 uint64_t RangeEnd = Range.first.second;
1055 uint64_t Count = Range.second;
1056
1057 InstructionPointer IP(Binary, RangeBegin, true);
1058 // Disjoint ranges may have range in the middle of two instr,
1059 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1060 // can be Addr1+1 to Addr2-1. We should ignore such range.
1061 if (IP.Address > RangeEnd)
1062 continue;
1063
1064 do {
1065 const AddressProbesMap &Address2ProbesMap =
1066 Binary->getAddress2ProbesMap();
1067 auto It = Address2ProbesMap.find(IP.Address);
1068 if (It != Address2ProbesMap.end()) {
1069 for (const auto &Probe : It->second) {
1070 ProbeCounter[&Probe] += Count;
1071 }
1072 }
1073 } while (IP.advance() && IP.Address <= RangeEnd);
1074 }
1075}
1076
1077static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1078 const SmallVectorImpl<uint64_t> &AddrVec,
1079 ProfiledBinary *Binary) {
1080 SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1081 for (auto Address : reverse(AddrVec)) {
1082 const MCDecodedPseudoProbe *CallProbe =
1083 Binary->getCallProbeForAddr(Address);
1084 // These could be the cases when a probe is not found at a calliste. Cutting
1085 // off the context from here since the inliner will not know how to consume
1086 // a context with unknown callsites.
1087 // 1. for functions that are not sampled when
1088 // --decode-probe-for-profiled-functions-only is on.
1089 // 2. for a merged callsite. Callsite merging may cause the loss of original
1090 // probe IDs.
1091 // 3. for an external callsite.
1092 if (!CallProbe)
1093 break;
1094 Probes.push_back(CallProbe);
1095 }
1096
1097 std::reverse(Probes.begin(), Probes.end());
1098
1099 // Extract context stack for reusing, leaf context stack will be added
1100 // compressed while looking up function profile.
1101 for (const auto *P : Probes) {
1102 Binary->getInlineContextForProbe(P, ContextStack, true);
1103 }
1104}
1105
1106void CSProfileGenerator::generateProbeBasedProfile() {
1107 // Enable pseudo probe functionalities in SampleProf
1108 FunctionSamples::ProfileIsProbeBased = true;
1109 for (const auto &CI : *SampleCounters) {
1110 const AddrBasedCtxKey *CtxKey =
9
'CtxKey' initialized to a null pointer value
1111 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
8
Assuming the object is not a 'CastReturnType'
1112 SampleContextFrameVector ContextStack;
1113 extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
10
Forming reference to null pointer
1114 // Fill in function body samples from probes, also infer caller's samples
1115 // from callee's probe
1116 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
1117 // Fill in boundary samples for a call probe
1118 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
1119 }
1120}
1121
1122void CSProfileGenerator::populateBodySamplesWithProbes(
1123 const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
1124 ProbeCounterMap ProbeCounter;
1125 // Extract the top frame probes by looking up each address among the range in
1126 // the Address2ProbeMap
1127 extractProbesFromRange(RangeCounter, ProbeCounter);
1128 std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1129 std::unordered_set<FunctionSamples *>>
1130 FrameSamples;
1131 for (const auto &PI : ProbeCounter) {
1132 const MCDecodedPseudoProbe *Probe = PI.first;
1133 uint64_t Count = PI.second;
1134 // Disjoint ranges have introduce zero-filled gap that
1135 // doesn't belong to current context, filter them out.
1136 if (!Probe->isBlock() || Count == 0)
1137 continue;
1138
1139 ContextTrieNode *ContextNode =
1140 getContextNodeForLeafProbe(ContextStack, Probe);
1141 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1142 // Record the current frame and FunctionProfile whenever samples are
1143 // collected for non-danglie probes. This is for reporting all of the
1144 // zero count probes of the frame later.
1145 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1146 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1147 FunctionProfile.addTotalSamples(Count);
1148 if (Probe->isEntry()) {
1149 FunctionProfile.addHeadSamples(Count);
1150 // Look up for the caller's function profile
1151 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1152 ContextTrieNode *CallerNode = ContextNode->getParentContext();
1153 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1154 // Since the context id will be compressed, we have to use callee's
1155 // context id to infer caller's context id to ensure they share the
1156 // same context prefix.
1157 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1158 assert(CallerIndex &&(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1159, __extension__
__PRETTY_FUNCTION__))
1159 "Inferred caller's location index shouldn't be zero!")(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1159, __extension__
__PRETTY_FUNCTION__))
;
1160 FunctionSamples &CallerProfile =
1161 *getOrCreateFunctionSamples(CallerNode);
1162 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1163 CallerProfile.addBodySamples(CallerIndex, 0, Count);
1164 CallerProfile.addTotalSamples(Count);
1165 CallerProfile.addCalledTargetSamples(CallerIndex, 0,
1166 ContextNode->getFuncName(), Count);
1167 }
1168 }
1169 }
1170
1171 // Assign zero count for remaining probes without sample hits to
1172 // differentiate from probes optimized away, of which the counts are unknown
1173 // and will be inferred by the compiler.
1174 for (auto &I : FrameSamples) {
1175 for (auto *FunctionProfile : I.second) {
1176 for (auto *Probe : I.first->getProbes()) {
1177 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1178 }
1179 }
1180 }
1181}
1182
1183void CSProfileGenerator::populateBoundarySamplesWithProbes(
1184 const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
1185 for (const auto &BI : BranchCounter) {
1186 uint64_t SourceAddress = BI.first.first;
1187 uint64_t TargetAddress = BI.first.second;
1188 uint64_t Count = BI.second;
1189 const MCDecodedPseudoProbe *CallProbe =
1190 Binary->getCallProbeForAddr(SourceAddress);
1191 if (CallProbe == nullptr)
1192 continue;
1193 FunctionSamples &FunctionProfile =
1194 getFunctionProfileForLeafProbe(ContextStack, CallProbe);
1195 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1196 FunctionProfile.addTotalSamples(Count);
1197 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1198 if (CalleeName.size() == 0)
1199 continue;
1200 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1201 Count);
1202 }
1203}
1204
1205ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1206 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1207
1208 // Explicitly copy the context for appending the leaf context
1209 SampleContextFrameVector NewContextStack(ContextStack.begin(),
1210 ContextStack.end());
1211 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1212 // For leaf inlined context with the top frame, we should strip off the top
1213 // frame's probe id, like:
1214 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1215 auto LeafFrame = NewContextStack.back();
1216 LeafFrame.Location = LineLocation(0, 0);
1217 NewContextStack.pop_back();
1218 // Compress the context string except for the leaf frame
1219 CSProfileGenerator::compressRecursionContext(NewContextStack);
1220 CSProfileGenerator::trimContext(NewContextStack);
1221 NewContextStack.push_back(LeafFrame);
1222
1223 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1224 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1225 ContextTrieNode *ContextNode =
1226 getOrCreateContextNode(NewContextStack, WasLeafInlined);
1227 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1228 return ContextNode;
1229}
1230
1231FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1232 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1233 return *getContextNodeForLeafProbe(ContextStack, LeafProbe)
1234 ->getFunctionSamples();
1235}
1236
1237} // end namespace sampleprof
1238} // end namespace llvm