Bug Summary

File:build/source/llvm/tools/llvm-profgen/ProfileGenerator.cpp
Warning:line 1228, column 41
Forming reference to null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ProfileGenerator.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/llvm-profgen -I /build/source/llvm/tools/llvm-profgen -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/tools/llvm-profgen/ProfileGenerator.cpp
1//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "ProfileGenerator.h"
9#include "ErrorHandling.h"
10#include "MissingFrameInferrer.h"
11#include "PerfReader.h"
12#include "ProfiledBinary.h"
13#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14#include "llvm/ProfileData/ProfileCommon.h"
15#include <algorithm>
16#include <float.h>
17#include <unordered_set>
18#include <utility>
19
20cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
21 cl::Required,
22 cl::desc("Output profile file"));
23static cl::alias OutputA("o", cl::desc("Alias for --output"),
24 cl::aliasopt(OutputFilename));
25
26static cl::opt<SampleProfileFormat> OutputFormat(
27 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
28 cl::values(
29 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)")llvm::cl::OptionEnumValue { "binary", int(SPF_Binary), "Binary encoding (default)"
}
,
30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding")llvm::cl::OptionEnumValue { "extbinary", int(SPF_Ext_Binary),
"Extensible binary encoding" }
,
31 clEnumValN(SPF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(SPF_Text), "Text encoding"
}
,
32 clEnumValN(SPF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
33 "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
));
34
35static cl::opt<bool> UseMD5(
36 "use-md5", cl::Hidden,
37 cl::desc("Use md5 to represent function names in the output profile (only "
38 "meaningful for -extbinary)"));
39
40static cl::opt<bool> PopulateProfileSymbolList(
41 "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43
44static cl::opt<bool> FillZeroForAllFuncs(
45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46 cl::desc("Attribute all functions' range with zero count "
47 "even it's not hit by any samples."));
48
49static cl::opt<int32_t, true> RecursionCompression(
50 "compress-recursion",
51 cl::desc("Compressing recursion by deduplicating adjacent frame "
52 "sequences up to the specified size. -1 means no size limit."),
53 cl::Hidden,
54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55
56static cl::opt<bool>
57 TrimColdProfile("trim-cold-profile",
58 cl::desc("If the total count of the profile is smaller "
59 "than threshold, it will be trimmed."));
60
61static cl::opt<bool> CSProfMergeColdContext(
62 "csprof-merge-cold-context", cl::init(true),
63 cl::desc("If the total count of context profile is smaller than "
64 "the threshold, it will be merged into context-less base "
65 "profile."));
66
67static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68 "csprof-max-cold-context-depth", cl::init(1),
69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70 "context-less base profile"));
71
72static cl::opt<int, true> CSProfMaxContextDepth(
73 "csprof-max-context-depth",
74 cl::desc("Keep the last K contexts while merging profile. -1 means no "
75 "depth limit."),
76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77
78static cl::opt<double> HotFunctionDensityThreshold(
79 "hot-function-density-threshold", llvm::cl::init(1000),
80 llvm::cl::desc(
81 "specify density threshold for hot functions (default: 1000)"),
82 llvm::cl::Optional);
83static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84 llvm::cl::desc("show profile density details"),
85 llvm::cl::Optional);
86
87static cl::opt<bool> UpdateTotalSamples(
88 "update-total-samples", llvm::cl::init(false),
89 llvm::cl::desc(
90 "Update total samples by accumulating all its body samples."),
91 llvm::cl::Optional);
92
93static cl::opt<bool> GenCSNestedProfile(
94 "gen-cs-nested-profile", cl::Hidden, cl::init(true),
95 cl::desc("Generate nested function profiles for CSSPGO"));
96
97cl::opt<bool> InferMissingFrames(
98 "infer-missing-frames", llvm::cl::init(true),
99 llvm::cl::desc(
100 "Infer missing call frames due to compiler tail call elimination."),
101 llvm::cl::Optional);
102
103using namespace llvm;
104using namespace sampleprof;
105
106namespace llvm {
107extern cl::opt<int> ProfileSummaryCutoffHot;
108extern cl::opt<bool> UseContextLessSummary;
109
110namespace sampleprof {
111
112// Initialize the MaxCompressionSize to -1 which means no size limit
113int32_t CSProfileGenerator::MaxCompressionSize = -1;
114
115int CSProfileGenerator::MaxContextDepth = -1;
116
117bool ProfileGeneratorBase::UseFSDiscriminator = false;
118
119std::unique_ptr<ProfileGeneratorBase>
120ProfileGeneratorBase::create(ProfiledBinary *Binary,
121 const ContextSampleCounterMap *SampleCounters,
122 bool ProfileIsCS) {
123 std::unique_ptr<ProfileGeneratorBase> Generator;
124 if (ProfileIsCS) {
125 if (Binary->useFSDiscriminator())
126 exitWithError("FS discriminator is not supported in CS profile.");
127 Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
128 } else {
129 Generator.reset(new ProfileGenerator(Binary, SampleCounters));
130 }
131 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
132 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
133
134 return Generator;
135}
136
137std::unique_ptr<ProfileGeneratorBase>
138ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
139 bool ProfileIsCS) {
140 std::unique_ptr<ProfileGeneratorBase> Generator;
141 if (ProfileIsCS) {
142 if (Binary->useFSDiscriminator())
143 exitWithError("FS discriminator is not supported in CS profile.");
144 Generator.reset(new CSProfileGenerator(Binary, Profiles));
145 } else {
146 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
147 }
148 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
149 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
150
151 return Generator;
152}
153
154void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
155 SampleProfileMap &ProfileMap) {
156 // Populate profile symbol list if extended binary format is used.
157 ProfileSymbolList SymbolList;
158
159 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
160 Binary->populateSymbolListFromDWARF(SymbolList);
161 Writer->setProfileSymbolList(&SymbolList);
162 }
163
164 if (std::error_code EC = Writer->write(ProfileMap))
165 exitWithError(std::move(EC));
166}
167
168void ProfileGeneratorBase::write() {
169 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
170 if (std::error_code EC = WriterOrErr.getError())
171 exitWithError(EC, OutputFilename);
172
173 if (UseMD5) {
174 if (OutputFormat != SPF_Ext_Binary)
175 WithColor::warning() << "-use-md5 is ignored. Specify "
176 "--format=extbinary to enable it\n";
177 else
178 WriterOrErr.get()->setUseMD5();
179 }
180
181 write(std::move(WriterOrErr.get()), ProfileMap);
182}
183
184void ProfileGeneratorBase::showDensitySuggestion(double Density) {
185 if (Density == 0.0)
186 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
187 "set too low. Please check your command.\n";
188 else if (Density < HotFunctionDensityThreshold)
189 WithColor::warning()
190 << "AutoFDO is estimated to optimize better with "
191 << format("%.1f", HotFunctionDensityThreshold / Density)
192 << "x more samples. Please consider increasing sampling rate or "
193 "profiling for longer duration to get more samples.\n";
194
195 if (ShowDensity)
196 outs() << "Minimum profile density for hot functions with top "
197 << format("%.2f",
198 static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
199 10000)
200 << "% total samples: " << format("%.1f", Density) << "\n";
201}
202
203double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
204 uint64_t HotCntThreshold) {
205 double Density = DBL_MAX1.7976931348623157e+308;
206 std::vector<const FunctionSamples *> HotFuncs;
207 for (auto &I : Profiles) {
208 auto &FuncSamples = I.second;
209 if (FuncSamples.getTotalSamples() < HotCntThreshold)
210 continue;
211 HotFuncs.emplace_back(&FuncSamples);
212 }
213
214 for (auto *FuncSamples : HotFuncs) {
215 auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
216 if (!Func)
217 continue;
218 uint64_t FuncSize = Func->getFuncSize();
219 if (FuncSize == 0)
220 continue;
221 Density =
222 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
223 FuncSize);
224 }
225
226 return Density == DBL_MAX1.7976931348623157e+308 ? 0.0 : Density;
227}
228
229void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
230 const RangeSample &Ranges) {
231
232 /*
233 Regions may overlap with each other. Using the boundary info, find all
234 disjoint ranges and their sample count. BoundaryPoint contains the count
235 multiple samples begin/end at this points.
236
237 |<--100-->| Sample1
238 |<------200------>| Sample2
239 A B C
240
241 In the example above,
242 Sample1 begins at A, ends at B, its value is 100.
243 Sample2 beings at A, ends at C, its value is 200.
244 For A, BeginCount is the sum of sample begins at A, which is 300 and no
245 samples ends at A, so EndCount is 0.
246 Then boundary points A, B, and C with begin/end counts are:
247 A: (300, 0)
248 B: (0, 100)
249 C: (0, 200)
250 */
251 struct BoundaryPoint {
252 // Sum of sample counts beginning at this point
253 uint64_t BeginCount = UINT64_MAX(18446744073709551615UL);
254 // Sum of sample counts ending at this point
255 uint64_t EndCount = UINT64_MAX(18446744073709551615UL);
256 // Is the begin point of a zero range.
257 bool IsZeroRangeBegin = false;
258 // Is the end point of a zero range.
259 bool IsZeroRangeEnd = false;
260
261 void addBeginCount(uint64_t Count) {
262 if (BeginCount == UINT64_MAX(18446744073709551615UL))
263 BeginCount = 0;
264 BeginCount += Count;
265 }
266
267 void addEndCount(uint64_t Count) {
268 if (EndCount == UINT64_MAX(18446744073709551615UL))
269 EndCount = 0;
270 EndCount += Count;
271 }
272 };
273
274 /*
275 For the above example. With boundary points, follwing logic finds two
276 disjoint region of
277
278 [A,B]: 300
279 [B+1,C]: 200
280
281 If there is a boundary point that both begin and end, the point itself
282 becomes a separate disjoint region. For example, if we have original
283 ranges of
284
285 |<--- 100 --->|
286 |<--- 200 --->|
287 A B C
288
289 there are three boundary points with their begin/end counts of
290
291 A: (100, 0)
292 B: (200, 100)
293 C: (0, 200)
294
295 the disjoint ranges would be
296
297 [A, B-1]: 100
298 [B, B]: 300
299 [B+1, C]: 200.
300
301 Example for zero value range:
302
303 |<--- 100 --->|
304 |<--- 200 --->|
305 |<--------------- 0 ----------------->|
306 A B C D E F
307
308 [A, B-1] : 0
309 [B, C] : 100
310 [C+1, D-1]: 0
311 [D, E] : 200
312 [E+1, F] : 0
313 */
314 std::map<uint64_t, BoundaryPoint> Boundaries;
315
316 for (const auto &Item : Ranges) {
317 assert(Item.first.first <= Item.first.second &&(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 318, __extension__
__PRETTY_FUNCTION__))
318 "Invalid instruction range")(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 318, __extension__
__PRETTY_FUNCTION__))
;
319 auto &BeginPoint = Boundaries[Item.first.first];
320 auto &EndPoint = Boundaries[Item.first.second];
321 uint64_t Count = Item.second;
322
323 BeginPoint.addBeginCount(Count);
324 EndPoint.addEndCount(Count);
325 if (Count == 0) {
326 BeginPoint.IsZeroRangeBegin = true;
327 EndPoint.IsZeroRangeEnd = true;
328 }
329 }
330
331 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
332 // and the next valid address
333 uint64_t BeginAddress = UINT64_MAX(18446744073709551615UL);
334 int ZeroRangeDepth = 0;
335 uint64_t Count = 0;
336 for (const auto &Item : Boundaries) {
337 uint64_t Address = Item.first;
338 const BoundaryPoint &Point = Item.second;
339 if (Point.BeginCount != UINT64_MAX(18446744073709551615UL)) {
340 if (BeginAddress != UINT64_MAX(18446744073709551615UL))
341 DisjointRanges[{BeginAddress, Address - 1}] = Count;
342 Count += Point.BeginCount;
343 BeginAddress = Address;
344 ZeroRangeDepth += Point.IsZeroRangeBegin;
345 }
346 if (Point.EndCount != UINT64_MAX(18446744073709551615UL)) {
347 assert((BeginAddress != UINT64_MAX) &&(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 348, __extension__
__PRETTY_FUNCTION__))
348 "First boundary point cannot be 'end' point")(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 348, __extension__
__PRETTY_FUNCTION__))
;
349 DisjointRanges[{BeginAddress, Address}] = Count;
350 assert(Count >= Point.EndCount && "Mismatched live ranges")(static_cast <bool> (Count >= Point.EndCount &&
"Mismatched live ranges") ? void (0) : __assert_fail ("Count >= Point.EndCount && \"Mismatched live ranges\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 350, __extension__
__PRETTY_FUNCTION__))
;
351 Count -= Point.EndCount;
352 BeginAddress = Address + 1;
353 ZeroRangeDepth -= Point.IsZeroRangeEnd;
354 // If the remaining count is zero and it's no longer in a zero range, this
355 // means we consume all the ranges before, thus mark BeginAddress as
356 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
357 // [<---- 10 ---->]
358 // [<---- 20 ---->]
359 // A B C D
360 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
361 // have the [B+1, C-1] zero range.
362 if (Count == 0 && ZeroRangeDepth == 0)
363 BeginAddress = UINT64_MAX(18446744073709551615UL);
364 }
365 }
366}
367
368void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
369 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
370 uint64_t Count) {
371 // Use the maximum count of samples with same line location
372 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
373
374 // Use duplication factor to compensated for loop unroll/vectorization.
375 // Note that this is only needed when we're taking MAX of the counts at
376 // the location instead of SUM.
377 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
378
379 ErrorOr<uint64_t> R =
380 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
381
382 uint64_t PreviousCount = R ? R.get() : 0;
383 if (PreviousCount <= Count) {
384 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
385 Count - PreviousCount);
386 }
387}
388
389void ProfileGeneratorBase::updateTotalSamples() {
390 for (auto &Item : ProfileMap) {
391 FunctionSamples &FunctionProfile = Item.second;
392 FunctionProfile.updateTotalSamples();
393 }
394}
395
396void ProfileGeneratorBase::updateCallsiteSamples() {
397 for (auto &Item : ProfileMap) {
398 FunctionSamples &FunctionProfile = Item.second;
399 FunctionProfile.updateCallsiteSamples();
400 }
401}
402
403void ProfileGeneratorBase::updateFunctionSamples() {
404 updateCallsiteSamples();
405
406 if (UpdateTotalSamples)
407 updateTotalSamples();
408}
409
410void ProfileGeneratorBase::collectProfiledFunctions() {
411 std::unordered_set<const BinaryFunction *> ProfiledFunctions;
412 if (collectFunctionsFromRawProfile(ProfiledFunctions))
413 Binary->setProfiledFunctions(ProfiledFunctions);
414 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
415 Binary->setProfiledFunctions(ProfiledFunctions);
416 else
417 llvm_unreachable("Unsupported input profile")::llvm::llvm_unreachable_internal("Unsupported input profile"
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 417)
;
418}
419
420bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
421 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
422 if (!SampleCounters)
423 return false;
424 // Go through all the stacks, ranges and branches in sample counters, use
425 // the start of the range to look up the function it belongs and record the
426 // function.
427 for (const auto &CI : *SampleCounters) {
428 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
429 for (auto StackAddr : CtxKey->Context) {
430 if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
431 ProfiledFunctions.insert(FRange->Func);
432 }
433 }
434
435 for (auto Item : CI.second.RangeCounter) {
436 uint64_t StartAddress = Item.first.first;
437 if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
438 ProfiledFunctions.insert(FRange->Func);
439 }
440
441 for (auto Item : CI.second.BranchCounter) {
442 uint64_t SourceAddress = Item.first.first;
443 uint64_t TargetAddress = Item.first.second;
444 if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
445 ProfiledFunctions.insert(FRange->Func);
446 if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
447 ProfiledFunctions.insert(FRange->Func);
448 }
449 }
450 return true;
451}
452
453bool ProfileGenerator::collectFunctionsFromLLVMProfile(
454 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
455 for (const auto &FS : ProfileMap) {
456 if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
457 ProfiledFunctions.insert(Func);
458 }
459 return true;
460}
461
462bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
463 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
464 for (auto *Node : ContextTracker) {
465 if (!Node->getFuncName().empty())
466 if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
467 ProfiledFunctions.insert(Func);
468 }
469 return true;
470}
471
472FunctionSamples &
473ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
474 SampleContext Context(FuncName);
475 auto Ret = ProfileMap.emplace(Context, FunctionSamples());
476 if (Ret.second) {
477 FunctionSamples &FProfile = Ret.first->second;
478 FProfile.setContext(Context);
479 }
480 return Ret.first->second;
481}
482
483void ProfileGenerator::generateProfile() {
484 collectProfiledFunctions();
485
486 if (Binary->usePseudoProbes())
487 Binary->decodePseudoProbe();
488
489 if (SampleCounters) {
490 if (Binary->usePseudoProbes()) {
491 generateProbeBasedProfile();
492 } else {
493 generateLineNumBasedProfile();
494 }
495 }
496
497 postProcessProfiles();
498}
499
500void ProfileGenerator::postProcessProfiles() {
501 computeSummaryAndThreshold(ProfileMap);
502 trimColdProfiles(ProfileMap, ColdCountThreshold);
503 calculateAndShowDensity(ProfileMap);
504}
505
506void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
507 uint64_t ColdCntThreshold) {
508 if (!TrimColdProfile)
509 return;
510
511 // Move cold profiles into a tmp container.
512 std::vector<SampleContext> ColdProfiles;
513 for (const auto &I : ProfileMap) {
514 if (I.second.getTotalSamples() < ColdCntThreshold)
515 ColdProfiles.emplace_back(I.first);
516 }
517
518 // Remove the cold profile from ProfileMap.
519 for (const auto &I : ColdProfiles)
520 ProfileMap.erase(I);
521}
522
523void ProfileGenerator::generateLineNumBasedProfile() {
524 assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 525, __extension__
__PRETTY_FUNCTION__))
525 "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 525, __extension__
__PRETTY_FUNCTION__))
;
526 const SampleCounter &SC = SampleCounters->begin()->second;
527 // Fill in function body samples
528 populateBodySamplesForAllFunctions(SC.RangeCounter);
529 // Fill in boundary sample counts as well as call site samples for calls
530 populateBoundarySamplesForAllFunctions(SC.BranchCounter);
531
532 updateFunctionSamples();
533}
534
535void ProfileGenerator::generateProbeBasedProfile() {
536 assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 537, __extension__
__PRETTY_FUNCTION__))
537 "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 537, __extension__
__PRETTY_FUNCTION__))
;
538 // Enable pseudo probe functionalities in SampleProf
539 FunctionSamples::ProfileIsProbeBased = true;
540 const SampleCounter &SC = SampleCounters->begin()->second;
541 // Fill in function body samples
542 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
543 // Fill in boundary sample counts as well as call site samples for calls
544 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
545
546 updateFunctionSamples();
547}
548
549void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
550 const RangeSample &RangeCounter) {
551 ProbeCounterMap ProbeCounter;
552 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
553 // inside extractProbesFromRange.
554 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
555 false);
556
557 for (const auto &PI : ProbeCounter) {
558 const MCDecodedPseudoProbe *Probe = PI.first;
559 uint64_t Count = PI.second;
560 SampleContextFrameVector FrameVec;
561 Binary->getInlineContextForProbe(Probe, FrameVec, true);
562 FunctionSamples &FunctionProfile =
563 getLeafProfileAndAddTotalSamples(FrameVec, Count);
564 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
565 if (Probe->isEntry())
566 FunctionProfile.addHeadSamples(Count);
567 }
568}
569
570void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
571 const BranchSample &BranchCounters) {
572 for (const auto &Entry : BranchCounters) {
573 uint64_t SourceAddress = Entry.first.first;
574 uint64_t TargetAddress = Entry.first.second;
575 uint64_t Count = Entry.second;
576 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 576, __extension__
__PRETTY_FUNCTION__))
;
577
578 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
579 if (CalleeName.size() == 0)
580 continue;
581
582 const MCDecodedPseudoProbe *CallProbe =
583 Binary->getCallProbeForAddr(SourceAddress);
584 if (CallProbe == nullptr)
585 continue;
586
587 // Record called target sample and its count.
588 SampleContextFrameVector FrameVec;
589 Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
590
591 if (!FrameVec.empty()) {
592 FunctionSamples &FunctionProfile =
593 getLeafProfileAndAddTotalSamples(FrameVec, 0);
594 FunctionProfile.addCalledTargetSamples(
595 FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
596 }
597 }
598}
599
600FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
601 const SampleContextFrameVector &FrameVec, uint64_t Count) {
602 // Get top level profile
603 FunctionSamples *FunctionProfile =
604 &getTopLevelFunctionProfile(FrameVec[0].FuncName);
605 FunctionProfile->addTotalSamples(Count);
606 if (Binary->usePseudoProbes()) {
607 const auto *FuncDesc = Binary->getFuncDescForGUID(
608 Function::getGUID(FunctionProfile->getName()));
609 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
610 }
611
612 for (size_t I = 1; I < FrameVec.size(); I++) {
613 LineLocation Callsite(
614 FrameVec[I - 1].Location.LineOffset,
615 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
616 FunctionSamplesMap &SamplesMap =
617 FunctionProfile->functionSamplesAt(Callsite);
618 auto Ret =
619 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
620 if (Ret.second) {
621 SampleContext Context(FrameVec[I].FuncName);
622 Ret.first->second.setContext(Context);
623 }
624 FunctionProfile = &Ret.first->second;
625 FunctionProfile->addTotalSamples(Count);
626 if (Binary->usePseudoProbes()) {
627 const auto *FuncDesc = Binary->getFuncDescForGUID(
628 Function::getGUID(FunctionProfile->getName()));
629 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
630 }
631 }
632
633 return *FunctionProfile;
634}
635
636RangeSample
637ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
638 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
639 if (FillZeroForAllFuncs) {
640 for (auto &FuncI : Binary->getAllBinaryFunctions()) {
641 for (auto &R : FuncI.second.Ranges) {
642 Ranges[{R.first, R.second - 1}] += 0;
643 }
644 }
645 } else {
646 // For each range, we search for all ranges of the function it belongs to
647 // and initialize it with zero count, so it remains zero if doesn't hit any
648 // samples. This is to be consistent with compiler that interpret zero count
649 // as unexecuted(cold).
650 for (const auto &I : RangeCounter) {
651 uint64_t StartAddress = I.first.first;
652 for (const auto &Range : Binary->getRanges(StartAddress))
653 Ranges[{Range.first, Range.second - 1}] += 0;
654 }
655 }
656 RangeSample DisjointRanges;
657 findDisjointRanges(DisjointRanges, Ranges);
658 return DisjointRanges;
659}
660
661void ProfileGenerator::populateBodySamplesForAllFunctions(
662 const RangeSample &RangeCounter) {
663 for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
664 uint64_t RangeBegin = Range.first.first;
665 uint64_t RangeEnd = Range.first.second;
666 uint64_t Count = Range.second;
667
668 InstructionPointer IP(Binary, RangeBegin, true);
669 // Disjoint ranges may have range in the middle of two instr,
670 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
671 // can be Addr1+1 to Addr2-1. We should ignore such range.
672 if (IP.Address > RangeEnd)
673 continue;
674
675 do {
676 const SampleContextFrameVector FrameVec =
677 Binary->getFrameLocationStack(IP.Address);
678 if (!FrameVec.empty()) {
679 // FIXME: As accumulating total count per instruction caused some
680 // regression, we changed to accumulate total count per byte as a
681 // workaround. Tuning hotness threshold on the compiler side might be
682 // necessary in the future.
683 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
684 FrameVec, Count * Binary->getInstSize(IP.Address));
685 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
686 Count);
687 }
688 } while (IP.advance() && IP.Address <= RangeEnd);
689 }
690}
691
692StringRef
693ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
694 // Get the function range by branch target if it's a call branch.
695 auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
696
697 // We won't accumulate sample count for a range whose start is not the real
698 // function entry such as outlined function or inner labels.
699 if (!FRange || !FRange->IsFuncEntry)
700 return StringRef();
701
702 return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
703}
704
705void ProfileGenerator::populateBoundarySamplesForAllFunctions(
706 const BranchSample &BranchCounters) {
707 for (const auto &Entry : BranchCounters) {
708 uint64_t SourceAddress = Entry.first.first;
709 uint64_t TargetAddress = Entry.first.second;
710 uint64_t Count = Entry.second;
711 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 711, __extension__
__PRETTY_FUNCTION__))
;
712
713 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
714 if (CalleeName.size() == 0)
715 continue;
716 // Record called target sample and its count.
717 const SampleContextFrameVector &FrameVec =
718 Binary->getCachedFrameLocationStack(SourceAddress);
719 if (!FrameVec.empty()) {
720 FunctionSamples &FunctionProfile =
721 getLeafProfileAndAddTotalSamples(FrameVec, 0);
722 FunctionProfile.addCalledTargetSamples(
723 FrameVec.back().Location.LineOffset,
724 getBaseDiscriminator(FrameVec.back().Location.Discriminator),
725 CalleeName, Count);
726 }
727 // Add head samples for callee.
728 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
729 CalleeProfile.addHeadSamples(Count);
730 }
731}
732
733void ProfileGeneratorBase::calculateAndShowDensity(
734 const SampleProfileMap &Profiles) {
735 double Density = calculateDensity(Profiles, HotCountThreshold);
736 showDensitySuggestion(Density);
737}
738
739FunctionSamples *
740CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
741 bool WasLeafInlined) {
742 FunctionSamples *FProfile = ContextNode->getFunctionSamples();
743 if (!FProfile) {
744 FSamplesList.emplace_back();
745 FProfile = &FSamplesList.back();
746 FProfile->setName(ContextNode->getFuncName());
747 ContextNode->setFunctionSamples(FProfile);
748 }
749 // Update ContextWasInlined attribute for existing contexts.
750 // The current function can be called in two ways:
751 // - when processing a probe of the current frame
752 // - when processing the entry probe of an inlinee's frame, which
753 // is then used to update the callsite count of the current frame.
754 // The two can happen in any order, hence here we are making sure
755 // `ContextWasInlined` is always set as expected.
756 // TODO: Note that the former does not always happen if no probes of the
757 // current frame has samples, and if the latter happens, we could lose the
758 // attribute. This should be fixed.
759 if (WasLeafInlined)
760 FProfile->getContext().setAttribute(ContextWasInlined);
761 return FProfile;
762}
763
764ContextTrieNode *
765CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
766 bool WasLeafInlined) {
767 ContextTrieNode *ContextNode =
768 ContextTracker.getOrCreateContextPath(Context, true);
769 getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
770 return ContextNode;
771}
772
773void CSProfileGenerator::generateProfile() {
774 FunctionSamples::ProfileIsCS = true;
775
776 collectProfiledFunctions();
777
778 if (Binary->usePseudoProbes()) {
1
Assuming the condition is true
2
Taking true branch
779 Binary->decodePseudoProbe();
780 if (InferMissingFrames)
3
Assuming the condition is true
4
Taking true branch
781 initializeMissingFrameInferrer();
782 }
783
784 if (SampleCounters
4.1
Field 'SampleCounters' is non-null
) {
5
Taking true branch
785 if (Binary->usePseudoProbes()) {
6
Assuming the condition is true
7
Taking true branch
786 generateProbeBasedProfile();
8
Calling 'CSProfileGenerator::generateProbeBasedProfile'
787 } else {
788 generateLineNumBasedProfile();
789 }
790 }
791
792 if (Binary->getTrackFuncContextSize())
793 computeSizeForProfiledFunctions();
794
795 postProcessProfiles();
796}
797
798void CSProfileGenerator::initializeMissingFrameInferrer() {
799 Binary->getMissingContextInferrer()->initialize(SampleCounters);
800}
801
802void CSProfileGenerator::inferMissingFrames(
803 const SmallVectorImpl<uint64_t> &Context,
804 SmallVectorImpl<uint64_t> &NewContext) {
805 Binary->inferMissingFrames(Context, NewContext);
806}
807
808void CSProfileGenerator::computeSizeForProfiledFunctions() {
809 for (auto *Func : Binary->getProfiledFunctions())
810 Binary->computeInlinedContextSizeForFunc(Func);
811
812 // Flush the symbolizer to save memory.
813 Binary->flushSymbolizer();
814}
815
816void CSProfileGenerator::updateFunctionSamples() {
817 for (auto *Node : ContextTracker) {
818 FunctionSamples *FSamples = Node->getFunctionSamples();
819 if (FSamples) {
820 if (UpdateTotalSamples)
821 FSamples->updateTotalSamples();
822 FSamples->updateCallsiteSamples();
823 }
824 }
825}
826
827void CSProfileGenerator::generateLineNumBasedProfile() {
828 for (const auto &CI : *SampleCounters) {
829 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
830
831 ContextTrieNode *ContextNode = &getRootContext();
832 // Sample context will be empty if the jump is an external-to-internal call
833 // pattern, the head samples should be added for the internal function.
834 if (!CtxKey->Context.empty()) {
835 // Get or create function profile for the range
836 ContextNode =
837 getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
838 // Fill in function body samples
839 populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
840 CI.second.RangeCounter);
841 }
842 // Fill in boundary sample counts as well as call site samples for calls
843 populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
844 }
845 // Fill in call site value sample for inlined calls and also use context to
846 // infer missing samples. Since we don't have call count for inlined
847 // functions, we estimate it from inlinee's profile using the entry of the
848 // body sample.
849 populateInferredFunctionSamples(getRootContext());
850
851 updateFunctionSamples();
852}
853
854void CSProfileGenerator::populateBodySamplesForFunction(
855 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
856 // Compute disjoint ranges first, so we can use MAX
857 // for calculating count for each location.
858 RangeSample Ranges;
859 findDisjointRanges(Ranges, RangeCounter);
860 for (const auto &Range : Ranges) {
861 uint64_t RangeBegin = Range.first.first;
862 uint64_t RangeEnd = Range.first.second;
863 uint64_t Count = Range.second;
864 // Disjoint ranges have introduce zero-filled gap that
865 // doesn't belong to current context, filter them out.
866 if (Count == 0)
867 continue;
868
869 InstructionPointer IP(Binary, RangeBegin, true);
870 // Disjoint ranges may have range in the middle of two instr,
871 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
872 // can be Addr1+1 to Addr2-1. We should ignore such range.
873 if (IP.Address > RangeEnd)
874 continue;
875
876 do {
877 auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
878 if (LeafLoc) {
879 // Recording body sample for this specific context
880 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
881 FunctionProfile.addTotalSamples(Count);
882 }
883 } while (IP.advance() && IP.Address <= RangeEnd);
884 }
885}
886
887void CSProfileGenerator::populateBoundarySamplesForFunction(
888 ContextTrieNode *Node, const BranchSample &BranchCounters) {
889
890 for (const auto &Entry : BranchCounters) {
891 uint64_t SourceAddress = Entry.first.first;
892 uint64_t TargetAddress = Entry.first.second;
893 uint64_t Count = Entry.second;
894 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 894, __extension__
__PRETTY_FUNCTION__))
;
895
896 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
897 if (CalleeName.size() == 0)
898 continue;
899
900 ContextTrieNode *CallerNode = Node;
901 LineLocation CalleeCallSite(0, 0);
902 if (CallerNode != &getRootContext()) {
903 // Record called target sample and its count
904 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
905 if (LeafLoc) {
906 CallerNode->getFunctionSamples()->addCalledTargetSamples(
907 LeafLoc->Location.LineOffset,
908 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
909 Count);
910 // Record head sample for called target(callee)
911 CalleeCallSite = LeafLoc->Location;
912 }
913 }
914
915 ContextTrieNode *CalleeNode =
916 CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
917 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
918 CalleeProfile->addHeadSamples(Count);
919 }
920}
921
922void CSProfileGenerator::populateInferredFunctionSamples(
923 ContextTrieNode &Node) {
924 // There is no call jmp sample between the inliner and inlinee, we need to use
925 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
926 // sample depends on child(inlinee)'s sample, so traverse the tree in
927 // post-order.
928 for (auto &It : Node.getAllChildContext())
929 populateInferredFunctionSamples(It.second);
930
931 FunctionSamples *CalleeProfile = Node.getFunctionSamples();
932 if (!CalleeProfile)
933 return;
934 // If we already have head sample counts, we must have value profile
935 // for call sites added already. Skip to avoid double counting.
936 if (CalleeProfile->getHeadSamples())
937 return;
938 ContextTrieNode *CallerNode = Node.getParentContext();
939 // If we don't have context, nothing to do for caller's call site.
940 // This could happen for entry point function.
941 if (CallerNode == &getRootContext())
942 return;
943
944 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
945 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
946 // Since we don't have call count for inlined functions, we
947 // estimate it from inlinee's profile using entry body sample.
948 uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
949 // If we don't have samples with location, use 1 to indicate live.
950 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
951 EstimatedCallCount = 1;
952 CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
953 CallerLeafFrameLoc.Discriminator,
954 Node.getFuncName(), EstimatedCallCount);
955 CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
956 CallerLeafFrameLoc.Discriminator,
957 EstimatedCallCount);
958 CallerProfile.addTotalSamples(EstimatedCallCount);
959}
960
961void CSProfileGenerator::convertToProfileMap(
962 ContextTrieNode &Node, SampleContextFrameVector &Context) {
963 FunctionSamples *FProfile = Node.getFunctionSamples();
964 if (FProfile) {
965 Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
966 // Save the new context for future references.
967 SampleContextFrames NewContext = *Contexts.insert(Context).first;
968 auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
969 FunctionSamples &NewProfile = Ret.first->second;
970 NewProfile.getContext().setContext(NewContext);
971 Context.pop_back();
972 }
973
974 for (auto &It : Node.getAllChildContext()) {
975 ContextTrieNode &ChildNode = It.second;
976 Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
977 convertToProfileMap(ChildNode, Context);
978 Context.pop_back();
979 }
980}
981
982void CSProfileGenerator::convertToProfileMap() {
983 assert(ProfileMap.empty() &&(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie"
) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 984, __extension__
__PRETTY_FUNCTION__))
984 "ProfileMap should be empty before converting from the trie")(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie"
) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 984, __extension__
__PRETTY_FUNCTION__))
;
985 assert(IsProfileValidOnTrie &&(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed"
) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 986, __extension__
__PRETTY_FUNCTION__))
986 "Do not convert the trie twice, it's already destroyed")(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed"
) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 986, __extension__
__PRETTY_FUNCTION__))
;
987
988 SampleContextFrameVector Context;
989 for (auto &It : getRootContext().getAllChildContext())
990 convertToProfileMap(It.second, Context);
991
992 IsProfileValidOnTrie = false;
993}
994
995void CSProfileGenerator::postProcessProfiles() {
996 // Compute hot/cold threshold based on profile. This will be used for cold
997 // context profile merging/trimming.
998 computeSummaryAndThreshold();
999
1000 // Run global pre-inliner to adjust/merge context profile based on estimated
1001 // inline decisions.
1002 if (EnableCSPreInliner) {
1003 ContextTracker.populateFuncToCtxtMap();
1004 CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
1005 // Turn off the profile merger by default unless it is explicitly enabled.
1006 if (!CSProfMergeColdContext.getNumOccurrences())
1007 CSProfMergeColdContext = false;
1008 }
1009
1010 convertToProfileMap();
1011
1012 // Trim and merge cold context profile using cold threshold above.
1013 if (TrimColdProfile || CSProfMergeColdContext) {
1014 SampleContextTrimmer(ProfileMap)
1015 .trimAndMergeColdContextProfiles(
1016 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1017 CSProfMaxColdContextDepth, EnableCSPreInliner);
1018 }
1019
1020 // Merge function samples of CS profile to calculate profile density.
1021 sampleprof::SampleProfileMap ContextLessProfiles;
1022 for (const auto &I : ProfileMap) {
1023 ContextLessProfiles[I.second.getName()].merge(I.second);
1024 }
1025
1026 calculateAndShowDensity(ContextLessProfiles);
1027 if (GenCSNestedProfile) {
1028 ProfileConverter CSConverter(ProfileMap);
1029 CSConverter.convertCSProfiles();
1030 FunctionSamples::ProfileIsCS = false;
1031 }
1032}
1033
1034void ProfileGeneratorBase::computeSummaryAndThreshold(
1035 SampleProfileMap &Profiles) {
1036 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1037 Summary = Builder.computeSummaryForProfiles(Profiles);
1038 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1039 (Summary->getDetailedSummary()));
1040 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1041 (Summary->getDetailedSummary()));
1042}
1043
1044void CSProfileGenerator::computeSummaryAndThreshold() {
1045 // Always merge and use context-less profile map to compute summary.
1046 SampleProfileMap ContextLessProfiles;
1047 ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1048
1049 // Set the flag below to avoid merging the profile again in
1050 // computeSummaryAndThreshold
1051 FunctionSamples::ProfileIsCS = false;
1052 assert((static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__
__PRETTY_FUNCTION__))
1053 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__
__PRETTY_FUNCTION__))
1054 "Don't set --profile-summary-contextless to false for profile "(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__
__PRETTY_FUNCTION__))
1055 "generation")(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__
__PRETTY_FUNCTION__))
;
1056 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1057 // Recover the old value.
1058 FunctionSamples::ProfileIsCS = true;
1059}
1060
1061void ProfileGeneratorBase::extractProbesFromRange(
1062 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1063 bool FindDisjointRanges) {
1064 const RangeSample *PRanges = &RangeCounter;
1065 RangeSample Ranges;
1066 if (FindDisjointRanges) {
1067 findDisjointRanges(Ranges, RangeCounter);
1068 PRanges = &Ranges;
1069 }
1070
1071 for (const auto &Range : *PRanges) {
1072 uint64_t RangeBegin = Range.first.first;
1073 uint64_t RangeEnd = Range.first.second;
1074 uint64_t Count = Range.second;
1075
1076 InstructionPointer IP(Binary, RangeBegin, true);
1077 // Disjoint ranges may have range in the middle of two instr,
1078 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1079 // can be Addr1+1 to Addr2-1. We should ignore such range.
1080 if (IP.Address > RangeEnd)
1081 continue;
1082
1083 do {
1084 const AddressProbesMap &Address2ProbesMap =
1085 Binary->getAddress2ProbesMap();
1086 auto It = Address2ProbesMap.find(IP.Address);
1087 if (It != Address2ProbesMap.end()) {
1088 for (const auto &Probe : It->second) {
1089 ProbeCounter[&Probe] += Count;
1090 }
1091 }
1092 } while (IP.advance() && IP.Address <= RangeEnd);
1093 }
1094}
1095
1096static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1097 const SmallVectorImpl<uint64_t> &AddrVec,
1098 ProfiledBinary *Binary) {
1099 SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1100 for (auto Address : reverse(AddrVec)) {
1101 const MCDecodedPseudoProbe *CallProbe =
1102 Binary->getCallProbeForAddr(Address);
1103 // These could be the cases when a probe is not found at a calliste. Cutting
1104 // off the context from here since the inliner will not know how to consume
1105 // a context with unknown callsites.
1106 // 1. for functions that are not sampled when
1107 // --decode-probe-for-profiled-functions-only is on.
1108 // 2. for a merged callsite. Callsite merging may cause the loss of original
1109 // probe IDs.
1110 // 3. for an external callsite.
1111 if (!CallProbe)
1112 break;
1113 Probes.push_back(CallProbe);
1114 }
1115
1116 std::reverse(Probes.begin(), Probes.end());
1117
1118 // Extract context stack for reusing, leaf context stack will be added
1119 // compressed while looking up function profile.
1120 for (const auto *P : Probes) {
1121 Binary->getInlineContextForProbe(P, ContextStack, true);
1122 }
1123}
1124
1125void CSProfileGenerator::generateProbeBasedProfile() {
1126 // Enable pseudo probe functionalities in SampleProf
1127 FunctionSamples::ProfileIsProbeBased = true;
1128 for (const auto &CI : *SampleCounters) {
1129 const AddrBasedCtxKey *CtxKey =
10
'CtxKey' initialized to a null pointer value
1130 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
9
Assuming the object is not a 'CastReturnType'
1131 // Fill in function body samples from probes, also infer caller's samples
1132 // from callee's probe
1133 populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey);
11
Passing null pointer value via 2nd parameter 'CtxKey'
12
Calling 'CSProfileGenerator::populateBodySamplesWithProbes'
1134 // Fill in boundary samples for a call probe
1135 populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey);
1136 }
1137}
1138
1139void CSProfileGenerator::populateBodySamplesWithProbes(
1140 const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) {
1141 ProbeCounterMap ProbeCounter;
1142 // Extract the top frame probes by looking up each address among the range in
1143 // the Address2ProbeMap
1144 extractProbesFromRange(RangeCounter, ProbeCounter);
1145 std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1146 std::unordered_set<FunctionSamples *>>
1147 FrameSamples;
1148 for (const auto &PI : ProbeCounter) {
1149 const MCDecodedPseudoProbe *Probe = PI.first;
1150 uint64_t Count = PI.second;
1151 // Disjoint ranges have introduce zero-filled gap that
1152 // doesn't belong to current context, filter them out.
1153 if (!Probe->isBlock() || Count == 0)
13
Assuming 'Count' is not equal to 0
14
Taking false branch
1154 continue;
1155
1156 ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe);
15
Passing null pointer value via 1st parameter 'CtxKey'
16
Calling 'CSProfileGenerator::getContextNodeForLeafProbe'
1157 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1158 // Record the current frame and FunctionProfile whenever samples are
1159 // collected for non-danglie probes. This is for reporting all of the
1160 // zero count probes of the frame later.
1161 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1162 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1163 FunctionProfile.addTotalSamples(Count);
1164 if (Probe->isEntry()) {
1165 FunctionProfile.addHeadSamples(Count);
1166 // Look up for the caller's function profile
1167 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1168 ContextTrieNode *CallerNode = ContextNode->getParentContext();
1169 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1170 // Since the context id will be compressed, we have to use callee's
1171 // context id to infer caller's context id to ensure they share the
1172 // same context prefix.
1173 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1174 assert(CallerIndex &&(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1175, __extension__
__PRETTY_FUNCTION__))
1175 "Inferred caller's location index shouldn't be zero!")(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1175, __extension__
__PRETTY_FUNCTION__))
;
1176 FunctionSamples &CallerProfile =
1177 *getOrCreateFunctionSamples(CallerNode);
1178 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1179 CallerProfile.addBodySamples(CallerIndex, 0, Count);
1180 CallerProfile.addTotalSamples(Count);
1181 CallerProfile.addCalledTargetSamples(CallerIndex, 0,
1182 ContextNode->getFuncName(), Count);
1183 }
1184 }
1185 }
1186
1187 // Assign zero count for remaining probes without sample hits to
1188 // differentiate from probes optimized away, of which the counts are unknown
1189 // and will be inferred by the compiler.
1190 for (auto &I : FrameSamples) {
1191 for (auto *FunctionProfile : I.second) {
1192 for (auto *Probe : I.first->getProbes()) {
1193 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1194 }
1195 }
1196 }
1197}
1198
1199void CSProfileGenerator::populateBoundarySamplesWithProbes(
1200 const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) {
1201 for (const auto &BI : BranchCounter) {
1202 uint64_t SourceAddress = BI.first.first;
1203 uint64_t TargetAddress = BI.first.second;
1204 uint64_t Count = BI.second;
1205 const MCDecodedPseudoProbe *CallProbe =
1206 Binary->getCallProbeForAddr(SourceAddress);
1207 if (CallProbe == nullptr)
1208 continue;
1209 FunctionSamples &FunctionProfile =
1210 getFunctionProfileForLeafProbe(CtxKey, CallProbe);
1211 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1212 FunctionProfile.addTotalSamples(Count);
1213 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1214 if (CalleeName.size() == 0)
1215 continue;
1216 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1217 Count);
1218 }
1219}
1220
1221ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1222 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1223
1224 const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context;
1225 SmallVector<uint64_t, 16> NewContext;
1226
1227 if (InferMissingFrames) {
17
Assuming the condition is true
18
Taking true branch
1228 SmallVector<uint64_t, 16> Context = CtxKey->Context;
19
Forming reference to null pointer
1229 // Append leaf frame for a complete inference.
1230 Context.push_back(LeafProbe->getAddress());
1231 inferMissingFrames(Context, NewContext);
1232 // Pop out the leaf probe that was pushed in above.
1233 NewContext.pop_back();
1234 PContext = &NewContext;
1235 }
1236
1237 SampleContextFrameVector ContextStack;
1238 extractPrefixContextStack(ContextStack, *PContext, Binary);
1239
1240 // Explicitly copy the context for appending the leaf context
1241 SampleContextFrameVector NewContextStack(ContextStack.begin(),
1242 ContextStack.end());
1243 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1244 // For leaf inlined context with the top frame, we should strip off the top
1245 // frame's probe id, like:
1246 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1247 auto LeafFrame = NewContextStack.back();
1248 LeafFrame.Location = LineLocation(0, 0);
1249 NewContextStack.pop_back();
1250 // Compress the context string except for the leaf frame
1251 CSProfileGenerator::compressRecursionContext(NewContextStack);
1252 CSProfileGenerator::trimContext(NewContextStack);
1253 NewContextStack.push_back(LeafFrame);
1254
1255 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1256 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1257 ContextTrieNode *ContextNode =
1258 getOrCreateContextNode(NewContextStack, WasLeafInlined);
1259 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1260 return ContextNode;
1261}
1262
1263FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1264 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1265 return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples();
1266}
1267
1268} // end namespace sampleprof
1269} // end namespace llvm