Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/tools/llvm-profgen/ProfileGenerator.cpp
Warning:line 1117, column 5
Forming reference to null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ProfileGenerator.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/llvm-profgen -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/tools/llvm-profgen -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/tools/llvm-profgen/ProfileGenerator.cpp
1//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "ProfileGenerator.h"
9#include "ErrorHandling.h"
10#include "PerfReader.h"
11#include "ProfiledBinary.h"
12#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
13#include "llvm/ProfileData/ProfileCommon.h"
14#include <algorithm>
15#include <float.h>
16#include <unordered_set>
17#include <utility>
18
19cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
20 cl::Required,
21 cl::desc("Output profile file"));
22static cl::alias OutputA("o", cl::desc("Alias for --output"),
23 cl::aliasopt(OutputFilename));
24
25static cl::opt<SampleProfileFormat> OutputFormat(
26 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
27 cl::values(
28 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)")llvm::cl::OptionEnumValue { "binary", int(SPF_Binary), "Binary encoding (default)"
}
,
29 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding")llvm::cl::OptionEnumValue { "compbinary", int(SPF_Compact_Binary
), "Compact binary encoding" }
,
30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding")llvm::cl::OptionEnumValue { "extbinary", int(SPF_Ext_Binary),
"Extensible binary encoding" }
,
31 clEnumValN(SPF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(SPF_Text), "Text encoding"
}
,
32 clEnumValN(SPF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
33 "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
));
34
35cl::opt<bool> UseMD5(
36 "use-md5", cl::init(false), cl::Hidden,
37 cl::desc("Use md5 to represent function names in the output profile (only "
38 "meaningful for -extbinary)"));
39
40static cl::opt<bool> PopulateProfileSymbolList(
41 "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43
44static cl::opt<bool> FillZeroForAllFuncs(
45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46 cl::desc("Attribute all functions' range with zero count "
47 "even it's not hit by any samples."));
48
49static cl::opt<int32_t, true> RecursionCompression(
50 "compress-recursion",
51 cl::desc("Compressing recursion by deduplicating adjacent frame "
52 "sequences up to the specified size. -1 means no size limit."),
53 cl::Hidden,
54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55
56static cl::opt<bool>
57 TrimColdProfile("trim-cold-profile",
58 cl::desc("If the total count of the profile is smaller "
59 "than threshold, it will be trimmed."));
60
61static cl::opt<bool> CSProfMergeColdContext(
62 "csprof-merge-cold-context", cl::init(true),
63 cl::desc("If the total count of context profile is smaller than "
64 "the threshold, it will be merged into context-less base "
65 "profile."));
66
67static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68 "csprof-max-cold-context-depth", cl::init(1),
69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70 "context-less base profile"));
71
72static cl::opt<int, true> CSProfMaxContextDepth(
73 "csprof-max-context-depth",
74 cl::desc("Keep the last K contexts while merging profile. -1 means no "
75 "depth limit."),
76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77
78static cl::opt<double> HotFunctionDensityThreshold(
79 "hot-function-density-threshold", llvm::cl::init(1000),
80 llvm::cl::desc(
81 "specify density threshold for hot functions (default: 1000)"),
82 llvm::cl::Optional);
83static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84 llvm::cl::desc("show profile density details"),
85 llvm::cl::Optional);
86
87static cl::opt<bool> UpdateTotalSamples(
88 "update-total-samples", llvm::cl::init(false),
89 llvm::cl::desc(
90 "Update total samples by accumulating all its body samples."),
91 llvm::cl::Optional);
92
93extern cl::opt<int> ProfileSummaryCutoffHot;
94extern cl::opt<bool> UseContextLessSummary;
95
96static cl::opt<bool> GenCSNestedProfile(
97 "gen-cs-nested-profile", cl::Hidden, cl::init(true),
98 cl::desc("Generate nested function profiles for CSSPGO"));
99
100using namespace llvm;
101using namespace sampleprof;
102
103namespace llvm {
104namespace sampleprof {
105
106// Initialize the MaxCompressionSize to -1 which means no size limit
107int32_t CSProfileGenerator::MaxCompressionSize = -1;
108
109int CSProfileGenerator::MaxContextDepth = -1;
110
111bool ProfileGeneratorBase::UseFSDiscriminator = false;
112
113std::unique_ptr<ProfileGeneratorBase>
114ProfileGeneratorBase::create(ProfiledBinary *Binary,
115 const ContextSampleCounterMap *SampleCounters,
116 bool ProfileIsCS) {
117 std::unique_ptr<ProfileGeneratorBase> Generator;
118 if (ProfileIsCS) {
119 if (Binary->useFSDiscriminator())
120 exitWithError("FS discriminator is not supported in CS profile.");
121 Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
122 } else {
123 Generator.reset(new ProfileGenerator(Binary, SampleCounters));
124 }
125 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
126 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
127
128 return Generator;
129}
130
131std::unique_ptr<ProfileGeneratorBase>
132ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
133 bool ProfileIsCS) {
134 std::unique_ptr<ProfileGeneratorBase> Generator;
135 if (ProfileIsCS) {
136 if (Binary->useFSDiscriminator())
137 exitWithError("FS discriminator is not supported in CS profile.");
138 Generator.reset(new CSProfileGenerator(Binary, Profiles));
139 } else {
140 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
141 }
142 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
143 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
144
145 return Generator;
146}
147
148void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
149 SampleProfileMap &ProfileMap) {
150 // Populate profile symbol list if extended binary format is used.
151 ProfileSymbolList SymbolList;
152
153 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
154 Binary->populateSymbolListFromDWARF(SymbolList);
155 Writer->setProfileSymbolList(&SymbolList);
156 }
157
158 if (std::error_code EC = Writer->write(ProfileMap))
159 exitWithError(std::move(EC));
160}
161
162void ProfileGeneratorBase::write() {
163 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
164 if (std::error_code EC = WriterOrErr.getError())
165 exitWithError(EC, OutputFilename);
166
167 if (UseMD5) {
168 if (OutputFormat != SPF_Ext_Binary)
169 WithColor::warning() << "-use-md5 is ignored. Specify "
170 "--format=extbinary to enable it\n";
171 else
172 WriterOrErr.get()->setUseMD5();
173 }
174
175 write(std::move(WriterOrErr.get()), ProfileMap);
176}
177
178void ProfileGeneratorBase::showDensitySuggestion(double Density) {
179 if (Density == 0.0)
180 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
181 "set too low. Please check your command.\n";
182 else if (Density < HotFunctionDensityThreshold)
183 WithColor::warning()
184 << "AutoFDO is estimated to optimize better with "
185 << format("%.1f", HotFunctionDensityThreshold / Density)
186 << "x more samples. Please consider increasing sampling rate or "
187 "profiling for longer duration to get more samples.\n";
188
189 if (ShowDensity)
190 outs() << "Minimum profile density for hot functions with top "
191 << format("%.2f",
192 static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
193 10000)
194 << "% total samples: " << format("%.1f", Density) << "\n";
195}
196
197double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
198 uint64_t HotCntThreshold) {
199 double Density = DBL_MAX1.7976931348623157e+308;
200 std::vector<const FunctionSamples *> HotFuncs;
201 for (auto &I : Profiles) {
202 auto &FuncSamples = I.second;
203 if (FuncSamples.getTotalSamples() < HotCntThreshold)
204 continue;
205 HotFuncs.emplace_back(&FuncSamples);
206 }
207
208 for (auto *FuncSamples : HotFuncs) {
209 auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
210 if (!Func)
211 continue;
212 uint64_t FuncSize = Func->getFuncSize();
213 if (FuncSize == 0)
214 continue;
215 Density =
216 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
217 FuncSize);
218 }
219
220 return Density == DBL_MAX1.7976931348623157e+308 ? 0.0 : Density;
221}
222
223void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
224 const RangeSample &Ranges) {
225
226 /*
227 Regions may overlap with each other. Using the boundary info, find all
228 disjoint ranges and their sample count. BoundaryPoint contains the count
229 multiple samples begin/end at this points.
230
231 |<--100-->| Sample1
232 |<------200------>| Sample2
233 A B C
234
235 In the example above,
236 Sample1 begins at A, ends at B, its value is 100.
237 Sample2 beings at A, ends at C, its value is 200.
238 For A, BeginCount is the sum of sample begins at A, which is 300 and no
239 samples ends at A, so EndCount is 0.
240 Then boundary points A, B, and C with begin/end counts are:
241 A: (300, 0)
242 B: (0, 100)
243 C: (0, 200)
244 */
245 struct BoundaryPoint {
246 // Sum of sample counts beginning at this point
247 uint64_t BeginCount = UINT64_MAX(18446744073709551615UL);
248 // Sum of sample counts ending at this point
249 uint64_t EndCount = UINT64_MAX(18446744073709551615UL);
250 // Is the begin point of a zero range.
251 bool IsZeroRangeBegin = false;
252 // Is the end point of a zero range.
253 bool IsZeroRangeEnd = false;
254
255 void addBeginCount(uint64_t Count) {
256 if (BeginCount == UINT64_MAX(18446744073709551615UL))
257 BeginCount = 0;
258 BeginCount += Count;
259 }
260
261 void addEndCount(uint64_t Count) {
262 if (EndCount == UINT64_MAX(18446744073709551615UL))
263 EndCount = 0;
264 EndCount += Count;
265 }
266 };
267
268 /*
269 For the above example. With boundary points, follwing logic finds two
270 disjoint region of
271
272 [A,B]: 300
273 [B+1,C]: 200
274
275 If there is a boundary point that both begin and end, the point itself
276 becomes a separate disjoint region. For example, if we have original
277 ranges of
278
279 |<--- 100 --->|
280 |<--- 200 --->|
281 A B C
282
283 there are three boundary points with their begin/end counts of
284
285 A: (100, 0)
286 B: (200, 100)
287 C: (0, 200)
288
289 the disjoint ranges would be
290
291 [A, B-1]: 100
292 [B, B]: 300
293 [B+1, C]: 200.
294
295 Example for zero value range:
296
297 |<--- 100 --->|
298 |<--- 200 --->|
299 |<--------------- 0 ----------------->|
300 A B C D E F
301
302 [A, B-1] : 0
303 [B, C] : 100
304 [C+1, D-1]: 0
305 [D, E] : 200
306 [E+1, F] : 0
307 */
308 std::map<uint64_t, BoundaryPoint> Boundaries;
309
310 for (const auto &Item : Ranges) {
311 assert(Item.first.first <= Item.first.second &&(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 312, __extension__
__PRETTY_FUNCTION__))
312 "Invalid instruction range")(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 312, __extension__
__PRETTY_FUNCTION__))
;
313 auto &BeginPoint = Boundaries[Item.first.first];
314 auto &EndPoint = Boundaries[Item.first.second];
315 uint64_t Count = Item.second;
316
317 BeginPoint.addBeginCount(Count);
318 EndPoint.addEndCount(Count);
319 if (Count == 0) {
320 BeginPoint.IsZeroRangeBegin = true;
321 EndPoint.IsZeroRangeEnd = true;
322 }
323 }
324
325 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
326 // and the next valid address
327 uint64_t BeginAddress = UINT64_MAX(18446744073709551615UL);
328 int ZeroRangeDepth = 0;
329 uint64_t Count = 0;
330 for (const auto &Item : Boundaries) {
331 uint64_t Address = Item.first;
332 const BoundaryPoint &Point = Item.second;
333 if (Point.BeginCount != UINT64_MAX(18446744073709551615UL)) {
334 if (BeginAddress != UINT64_MAX(18446744073709551615UL))
335 DisjointRanges[{BeginAddress, Address - 1}] = Count;
336 Count += Point.BeginCount;
337 BeginAddress = Address;
338 ZeroRangeDepth += Point.IsZeroRangeBegin;
339 }
340 if (Point.EndCount != UINT64_MAX(18446744073709551615UL)) {
341 assert((BeginAddress != UINT64_MAX) &&(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 342, __extension__
__PRETTY_FUNCTION__))
342 "First boundary point cannot be 'end' point")(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 342, __extension__
__PRETTY_FUNCTION__))
;
343 DisjointRanges[{BeginAddress, Address}] = Count;
344 assert(Count >= Point.EndCount && "Mismatched live ranges")(static_cast <bool> (Count >= Point.EndCount &&
"Mismatched live ranges") ? void (0) : __assert_fail ("Count >= Point.EndCount && \"Mismatched live ranges\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 344, __extension__
__PRETTY_FUNCTION__))
;
345 Count -= Point.EndCount;
346 BeginAddress = Address + 1;
347 ZeroRangeDepth -= Point.IsZeroRangeEnd;
348 // If the remaining count is zero and it's no longer in a zero range, this
349 // means we consume all the ranges before, thus mark BeginAddress as
350 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
351 // [<---- 10 ---->]
352 // [<---- 20 ---->]
353 // A B C D
354 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
355 // have the [B+1, C-1] zero range.
356 if (Count == 0 && ZeroRangeDepth == 0)
357 BeginAddress = UINT64_MAX(18446744073709551615UL);
358 }
359 }
360}
361
362void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
363 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
364 uint64_t Count) {
365 // Use the maximum count of samples with same line location
366 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
367
368 // Use duplication factor to compensated for loop unroll/vectorization.
369 // Note that this is only needed when we're taking MAX of the counts at
370 // the location instead of SUM.
371 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
372
373 ErrorOr<uint64_t> R =
374 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
375
376 uint64_t PreviousCount = R ? R.get() : 0;
377 if (PreviousCount <= Count) {
378 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
379 Count - PreviousCount);
380 }
381}
382
383void ProfileGeneratorBase::updateTotalSamples() {
384 for (auto &Item : ProfileMap) {
385 FunctionSamples &FunctionProfile = Item.second;
386 FunctionProfile.updateTotalSamples();
387 }
388}
389
390void ProfileGeneratorBase::updateCallsiteSamples() {
391 for (auto &Item : ProfileMap) {
392 FunctionSamples &FunctionProfile = Item.second;
393 FunctionProfile.updateCallsiteSamples();
394 }
395}
396
397void ProfileGeneratorBase::updateFunctionSamples() {
398 updateCallsiteSamples();
399
400 if (UpdateTotalSamples)
401 updateTotalSamples();
402}
403
404void ProfileGeneratorBase::collectProfiledFunctions() {
405 std::unordered_set<const BinaryFunction *> ProfiledFunctions;
406 if (collectFunctionsFromRawProfile(ProfiledFunctions))
407 Binary->setProfiledFunctions(ProfiledFunctions);
408 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
409 Binary->setProfiledFunctions(ProfiledFunctions);
410 else
411 llvm_unreachable("Unsupported input profile")::llvm::llvm_unreachable_internal("Unsupported input profile"
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 411)
;
412}
413
414bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
415 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
416 if (!SampleCounters)
417 return false;
418 // Go through all the stacks, ranges and branches in sample counters, use
419 // the start of the range to look up the function it belongs and record the
420 // function.
421 for (const auto &CI : *SampleCounters) {
422 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
423 for (auto Addr : CtxKey->Context) {
424 if (FuncRange *FRange = Binary->findFuncRangeForOffset(
425 Binary->virtualAddrToOffset(Addr)))
426 ProfiledFunctions.insert(FRange->Func);
427 }
428 }
429
430 for (auto Item : CI.second.RangeCounter) {
431 uint64_t StartOffset = Item.first.first;
432 if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
433 ProfiledFunctions.insert(FRange->Func);
434 }
435
436 for (auto Item : CI.second.BranchCounter) {
437 uint64_t SourceOffset = Item.first.first;
438 uint64_t TargetOffset = Item.first.first;
439 if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
440 ProfiledFunctions.insert(FRange->Func);
441 if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
442 ProfiledFunctions.insert(FRange->Func);
443 }
444 }
445 return true;
446}
447
448bool ProfileGenerator::collectFunctionsFromLLVMProfile(
449 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
450 for (const auto &FS : ProfileMap) {
451 if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
452 ProfiledFunctions.insert(Func);
453 }
454 return true;
455}
456
457bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
458 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
459 for (auto *Node : ContextTracker) {
460 if (!Node->getFuncName().empty())
461 if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
462 ProfiledFunctions.insert(Func);
463 }
464 return true;
465}
466
467FunctionSamples &
468ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
469 SampleContext Context(FuncName);
470 auto Ret = ProfileMap.emplace(Context, FunctionSamples());
471 if (Ret.second) {
472 FunctionSamples &FProfile = Ret.first->second;
473 FProfile.setContext(Context);
474 }
475 return Ret.first->second;
476}
477
478void ProfileGenerator::generateProfile() {
479 collectProfiledFunctions();
480
481 if (Binary->usePseudoProbes())
482 Binary->decodePseudoProbe();
483
484 if (SampleCounters) {
485 if (Binary->usePseudoProbes()) {
486 generateProbeBasedProfile();
487 } else {
488 generateLineNumBasedProfile();
489 }
490 }
491
492 postProcessProfiles();
493}
494
495void ProfileGenerator::postProcessProfiles() {
496 computeSummaryAndThreshold(ProfileMap);
497 trimColdProfiles(ProfileMap, ColdCountThreshold);
498 calculateAndShowDensity(ProfileMap);
499}
500
501void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
502 uint64_t ColdCntThreshold) {
503 if (!TrimColdProfile)
504 return;
505
506 // Move cold profiles into a tmp container.
507 std::vector<SampleContext> ColdProfiles;
508 for (const auto &I : ProfileMap) {
509 if (I.second.getTotalSamples() < ColdCntThreshold)
510 ColdProfiles.emplace_back(I.first);
511 }
512
513 // Remove the cold profile from ProfileMap.
514 for (const auto &I : ColdProfiles)
515 ProfileMap.erase(I);
516}
517
518void ProfileGenerator::generateLineNumBasedProfile() {
519 assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 520, __extension__
__PRETTY_FUNCTION__))
520 "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 520, __extension__
__PRETTY_FUNCTION__))
;
521 const SampleCounter &SC = SampleCounters->begin()->second;
522 // Fill in function body samples
523 populateBodySamplesForAllFunctions(SC.RangeCounter);
524 // Fill in boundary sample counts as well as call site samples for calls
525 populateBoundarySamplesForAllFunctions(SC.BranchCounter);
526
527 updateFunctionSamples();
528}
529
530void ProfileGenerator::generateProbeBasedProfile() {
531 assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 532, __extension__
__PRETTY_FUNCTION__))
532 "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 532, __extension__
__PRETTY_FUNCTION__))
;
533 // Enable pseudo probe functionalities in SampleProf
534 FunctionSamples::ProfileIsProbeBased = true;
535 const SampleCounter &SC = SampleCounters->begin()->second;
536 // Fill in function body samples
537 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
538 // Fill in boundary sample counts as well as call site samples for calls
539 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
540
541 updateFunctionSamples();
542}
543
544void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
545 const RangeSample &RangeCounter) {
546 ProbeCounterMap ProbeCounter;
547 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
548 // inside extractProbesFromRange.
549 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
550 false);
551
552 for (const auto &PI : ProbeCounter) {
553 const MCDecodedPseudoProbe *Probe = PI.first;
554 uint64_t Count = PI.second;
555 SampleContextFrameVector FrameVec;
556 Binary->getInlineContextForProbe(Probe, FrameVec, true);
557 FunctionSamples &FunctionProfile =
558 getLeafProfileAndAddTotalSamples(FrameVec, Count);
559 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
560 if (Probe->isEntry())
561 FunctionProfile.addHeadSamples(Count);
562 }
563}
564
565void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
566 const BranchSample &BranchCounters) {
567 for (const auto &Entry : BranchCounters) {
568 uint64_t SourceOffset = Entry.first.first;
569 uint64_t TargetOffset = Entry.first.second;
570 uint64_t Count = Entry.second;
571 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 571, __extension__
__PRETTY_FUNCTION__))
;
572
573 StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
574 if (CalleeName.size() == 0)
575 continue;
576
577 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
578 const MCDecodedPseudoProbe *CallProbe =
579 Binary->getCallProbeForAddr(SourceAddress);
580 if (CallProbe == nullptr)
581 continue;
582
583 // Record called target sample and its count.
584 SampleContextFrameVector FrameVec;
585 Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
586
587 if (!FrameVec.empty()) {
588 FunctionSamples &FunctionProfile =
589 getLeafProfileAndAddTotalSamples(FrameVec, 0);
590 FunctionProfile.addCalledTargetSamples(
591 FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
592 }
593 }
594}
595
596FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
597 const SampleContextFrameVector &FrameVec, uint64_t Count) {
598 // Get top level profile
599 FunctionSamples *FunctionProfile =
600 &getTopLevelFunctionProfile(FrameVec[0].FuncName);
601 FunctionProfile->addTotalSamples(Count);
602 if (Binary->usePseudoProbes()) {
603 const auto *FuncDesc = Binary->getFuncDescForGUID(
604 Function::getGUID(FunctionProfile->getName()));
605 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
606 }
607
608 for (size_t I = 1; I < FrameVec.size(); I++) {
609 LineLocation Callsite(
610 FrameVec[I - 1].Location.LineOffset,
611 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
612 FunctionSamplesMap &SamplesMap =
613 FunctionProfile->functionSamplesAt(Callsite);
614 auto Ret =
615 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
616 if (Ret.second) {
617 SampleContext Context(FrameVec[I].FuncName);
618 Ret.first->second.setContext(Context);
619 }
620 FunctionProfile = &Ret.first->second;
621 FunctionProfile->addTotalSamples(Count);
622 if (Binary->usePseudoProbes()) {
623 const auto *FuncDesc = Binary->getFuncDescForGUID(
624 Function::getGUID(FunctionProfile->getName()));
625 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
626 }
627 }
628
629 return *FunctionProfile;
630}
631
632RangeSample
633ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
634 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
635 if (FillZeroForAllFuncs) {
636 for (auto &FuncI : Binary->getAllBinaryFunctions()) {
637 for (auto &R : FuncI.second.Ranges) {
638 Ranges[{R.first, R.second - 1}] += 0;
639 }
640 }
641 } else {
642 // For each range, we search for all ranges of the function it belongs to
643 // and initialize it with zero count, so it remains zero if doesn't hit any
644 // samples. This is to be consistent with compiler that interpret zero count
645 // as unexecuted(cold).
646 for (const auto &I : RangeCounter) {
647 uint64_t StartOffset = I.first.first;
648 for (const auto &Range : Binary->getRangesForOffset(StartOffset))
649 Ranges[{Range.first, Range.second - 1}] += 0;
650 }
651 }
652 RangeSample DisjointRanges;
653 findDisjointRanges(DisjointRanges, Ranges);
654 return DisjointRanges;
655}
656
657void ProfileGenerator::populateBodySamplesForAllFunctions(
658 const RangeSample &RangeCounter) {
659 for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
660 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
661 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
662 uint64_t Count = Range.second;
663
664 InstructionPointer IP(Binary, RangeBegin, true);
665 // Disjoint ranges may have range in the middle of two instr,
666 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
667 // can be Addr1+1 to Addr2-1. We should ignore such range.
668 if (IP.Address > RangeEnd)
669 continue;
670
671 do {
672 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
673 const SampleContextFrameVector &FrameVec =
674 Binary->getFrameLocationStack(Offset);
675 if (!FrameVec.empty()) {
676 // FIXME: As accumulating total count per instruction caused some
677 // regression, we changed to accumulate total count per byte as a
678 // workaround. Tuning hotness threshold on the compiler side might be
679 // necessary in the future.
680 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
681 FrameVec, Count * Binary->getInstSize(Offset));
682 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
683 Count);
684 }
685 } while (IP.advance() && IP.Address <= RangeEnd);
686 }
687}
688
689StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
690 // Get the function range by branch target if it's a call branch.
691 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
692
693 // We won't accumulate sample count for a range whose start is not the real
694 // function entry such as outlined function or inner labels.
695 if (!FRange || !FRange->IsFuncEntry)
696 return StringRef();
697
698 return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
699}
700
701void ProfileGenerator::populateBoundarySamplesForAllFunctions(
702 const BranchSample &BranchCounters) {
703 for (const auto &Entry : BranchCounters) {
704 uint64_t SourceOffset = Entry.first.first;
705 uint64_t TargetOffset = Entry.first.second;
706 uint64_t Count = Entry.second;
707 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 707, __extension__
__PRETTY_FUNCTION__))
;
708
709 StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
710 if (CalleeName.size() == 0)
711 continue;
712 // Record called target sample and its count.
713 const SampleContextFrameVector &FrameVec =
714 Binary->getFrameLocationStack(SourceOffset);
715 if (!FrameVec.empty()) {
716 FunctionSamples &FunctionProfile =
717 getLeafProfileAndAddTotalSamples(FrameVec, 0);
718 FunctionProfile.addCalledTargetSamples(
719 FrameVec.back().Location.LineOffset,
720 getBaseDiscriminator(FrameVec.back().Location.Discriminator),
721 CalleeName, Count);
722 }
723 // Add head samples for callee.
724 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
725 CalleeProfile.addHeadSamples(Count);
726 }
727}
728
729void ProfileGeneratorBase::calculateAndShowDensity(
730 const SampleProfileMap &Profiles) {
731 double Density = calculateDensity(Profiles, HotCountThreshold);
732 showDensitySuggestion(Density);
733}
734
735FunctionSamples *
736CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
737 bool WasLeafInlined) {
738 FunctionSamples *FProfile = ContextNode->getFunctionSamples();
739 if (!FProfile) {
740 FSamplesList.emplace_back();
741 FProfile = &FSamplesList.back();
742 FProfile->setName(ContextNode->getFuncName());
743 ContextNode->setFunctionSamples(FProfile);
744 }
745 // Update ContextWasInlined attribute for existing contexts.
746 // The current function can be called in two ways:
747 // - when processing a probe of the current frame
748 // - when processing the entry probe of an inlinee's frame, which
749 // is then used to update the callsite count of the current frame.
750 // The two can happen in any order, hence here we are making sure
751 // `ContextWasInlined` is always set as expected.
752 // TODO: Note that the former does not always happen if no probes of the
753 // current frame has samples, and if the latter happens, we could lose the
754 // attribute. This should be fixed.
755 if (WasLeafInlined)
756 FProfile->getContext().setAttribute(ContextWasInlined);
757 return FProfile;
758}
759
760ContextTrieNode *
761CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
762 bool WasLeafInlined) {
763 ContextTrieNode *ContextNode =
764 ContextTracker.getOrCreateContextPath(Context, true);
765 getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
766 return ContextNode;
767}
768
769void CSProfileGenerator::generateProfile() {
770 FunctionSamples::ProfileIsCS = true;
771
772 collectProfiledFunctions();
773
774 if (Binary->usePseudoProbes())
1
Assuming the condition is true
2
Taking true branch
775 Binary->decodePseudoProbe();
776
777 if (SampleCounters) {
3
Assuming field 'SampleCounters' is non-null
4
Taking true branch
778 if (Binary->usePseudoProbes()) {
5
Assuming the condition is true
6
Taking true branch
779 generateProbeBasedProfile();
7
Calling 'CSProfileGenerator::generateProbeBasedProfile'
780 } else {
781 generateLineNumBasedProfile();
782 }
783 }
784
785 if (Binary->getTrackFuncContextSize())
786 computeSizeForProfiledFunctions();
787
788 postProcessProfiles();
789}
790
791void CSProfileGenerator::computeSizeForProfiledFunctions() {
792 std::unordered_set<const BinaryFunction *> ProfiledFunctions;
793 for (auto *Func : Binary->getProfiledFunctions())
794 Binary->computeInlinedContextSizeForFunc(Func);
795
796 // Flush the symbolizer to save memory.
797 Binary->flushSymbolizer();
798}
799
800void CSProfileGenerator::updateFunctionSamples() {
801 for (auto *Node : ContextTracker) {
802 FunctionSamples *FSamples = Node->getFunctionSamples();
803 if (FSamples) {
804 if (UpdateTotalSamples)
805 FSamples->updateTotalSamples();
806 FSamples->updateCallsiteSamples();
807 }
808 }
809}
810
811void CSProfileGenerator::generateLineNumBasedProfile() {
812 for (const auto &CI : *SampleCounters) {
813 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
814
815 ContextTrieNode *ContextNode = &getRootContext();
816 // Sample context will be empty if the jump is an external-to-internal call
817 // pattern, the head samples should be added for the internal function.
818 if (!CtxKey->Context.empty()) {
819 // Get or create function profile for the range
820 ContextNode =
821 getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
822 // Fill in function body samples
823 populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
824 CI.second.RangeCounter);
825 }
826 // Fill in boundary sample counts as well as call site samples for calls
827 populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
828 }
829 // Fill in call site value sample for inlined calls and also use context to
830 // infer missing samples. Since we don't have call count for inlined
831 // functions, we estimate it from inlinee's profile using the entry of the
832 // body sample.
833 populateInferredFunctionSamples(getRootContext());
834
835 updateFunctionSamples();
836}
837
838void CSProfileGenerator::populateBodySamplesForFunction(
839 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
840 // Compute disjoint ranges first, so we can use MAX
841 // for calculating count for each location.
842 RangeSample Ranges;
843 findDisjointRanges(Ranges, RangeCounter);
844 for (const auto &Range : Ranges) {
845 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
846 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
847 uint64_t Count = Range.second;
848 // Disjoint ranges have introduce zero-filled gap that
849 // doesn't belong to current context, filter them out.
850 if (Count == 0)
851 continue;
852
853 InstructionPointer IP(Binary, RangeBegin, true);
854 // Disjoint ranges may have range in the middle of two instr,
855 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
856 // can be Addr1+1 to Addr2-1. We should ignore such range.
857 if (IP.Address > RangeEnd)
858 continue;
859
860 do {
861 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
862 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
863 if (LeafLoc) {
864 // Recording body sample for this specific context
865 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
866 FunctionProfile.addTotalSamples(Count);
867 }
868 } while (IP.advance() && IP.Address <= RangeEnd);
869 }
870}
871
872void CSProfileGenerator::populateBoundarySamplesForFunction(
873 ContextTrieNode *Node, const BranchSample &BranchCounters) {
874
875 for (const auto &Entry : BranchCounters) {
876 uint64_t SourceOffset = Entry.first.first;
877 uint64_t TargetOffset = Entry.first.second;
878 uint64_t Count = Entry.second;
879 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 879, __extension__
__PRETTY_FUNCTION__))
;
880
881 StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
882 if (CalleeName.size() == 0)
883 continue;
884
885 ContextTrieNode *CallerNode = Node;
886 LineLocation CalleeCallSite(0, 0);
887 if (CallerNode != &getRootContext()) {
888 // Record called target sample and its count
889 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
890 if (LeafLoc) {
891 CallerNode->getFunctionSamples()->addCalledTargetSamples(
892 LeafLoc->Location.LineOffset,
893 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
894 Count);
895 // Record head sample for called target(callee)
896 CalleeCallSite = LeafLoc->Location;
897 }
898 }
899
900 ContextTrieNode *CalleeNode =
901 CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
902 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
903 CalleeProfile->addHeadSamples(Count);
904 }
905}
906
907void CSProfileGenerator::populateInferredFunctionSamples(
908 ContextTrieNode &Node) {
909 // There is no call jmp sample between the inliner and inlinee, we need to use
910 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
911 // sample depends on child(inlinee)'s sample, so traverse the tree in
912 // post-order.
913 for (auto &It : Node.getAllChildContext())
914 populateInferredFunctionSamples(It.second);
915
916 FunctionSamples *CalleeProfile = Node.getFunctionSamples();
917 if (!CalleeProfile)
918 return;
919 // If we already have head sample counts, we must have value profile
920 // for call sites added already. Skip to avoid double counting.
921 if (CalleeProfile->getHeadSamples())
922 return;
923 ContextTrieNode *CallerNode = Node.getParentContext();
924 // If we don't have context, nothing to do for caller's call site.
925 // This could happen for entry point function.
926 if (CallerNode == &getRootContext())
927 return;
928
929 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
930 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
931 // Since we don't have call count for inlined functions, we
932 // estimate it from inlinee's profile using entry body sample.
933 uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
934 // If we don't have samples with location, use 1 to indicate live.
935 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
936 EstimatedCallCount = 1;
937 CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
938 CallerLeafFrameLoc.Discriminator,
939 Node.getFuncName(), EstimatedCallCount);
940 CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
941 CallerLeafFrameLoc.Discriminator,
942 EstimatedCallCount);
943 CallerProfile.addTotalSamples(EstimatedCallCount);
944}
945
946void CSProfileGenerator::convertToProfileMap(
947 ContextTrieNode &Node, SampleContextFrameVector &Context) {
948 FunctionSamples *FProfile = Node.getFunctionSamples();
949 if (FProfile) {
950 Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
951 // Save the new context for future references.
952 SampleContextFrames NewContext = *Contexts.insert(Context).first;
953 auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
954 FunctionSamples &NewProfile = Ret.first->second;
955 NewProfile.getContext().setContext(NewContext);
956 Context.pop_back();
957 }
958
959 for (auto &It : Node.getAllChildContext()) {
960 ContextTrieNode &ChildNode = It.second;
961 Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
962 convertToProfileMap(ChildNode, Context);
963 Context.pop_back();
964 }
965}
966
967void CSProfileGenerator::convertToProfileMap() {
968 assert(ProfileMap.empty() &&(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie"
) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 969, __extension__
__PRETTY_FUNCTION__))
969 "ProfileMap should be empty before converting from the trie")(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie"
) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 969, __extension__
__PRETTY_FUNCTION__))
;
970 assert(IsProfileValidOnTrie &&(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed"
) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 971, __extension__
__PRETTY_FUNCTION__))
971 "Do not convert the trie twice, it's already destroyed")(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed"
) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 971, __extension__
__PRETTY_FUNCTION__))
;
972
973 SampleContextFrameVector Context;
974 for (auto &It : getRootContext().getAllChildContext())
975 convertToProfileMap(It.second, Context);
976
977 IsProfileValidOnTrie = false;
978}
979
980void CSProfileGenerator::postProcessProfiles() {
981 // Compute hot/cold threshold based on profile. This will be used for cold
982 // context profile merging/trimming.
983 computeSummaryAndThreshold();
984
985 // Run global pre-inliner to adjust/merge context profile based on estimated
986 // inline decisions.
987 if (EnableCSPreInliner) {
988 ContextTracker.populateFuncToCtxtMap();
989 CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
990 // Turn off the profile merger by default unless it is explicitly enabled.
991 if (!CSProfMergeColdContext.getNumOccurrences())
992 CSProfMergeColdContext = false;
993 }
994
995 convertToProfileMap();
996
997 // Trim and merge cold context profile using cold threshold above.
998 if (TrimColdProfile || CSProfMergeColdContext) {
999 SampleContextTrimmer(ProfileMap)
1000 .trimAndMergeColdContextProfiles(
1001 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1002 CSProfMaxColdContextDepth, EnableCSPreInliner);
1003 }
1004
1005 // Merge function samples of CS profile to calculate profile density.
1006 sampleprof::SampleProfileMap ContextLessProfiles;
1007 for (const auto &I : ProfileMap) {
1008 ContextLessProfiles[I.second.getName()].merge(I.second);
1009 }
1010
1011 calculateAndShowDensity(ContextLessProfiles);
1012 if (GenCSNestedProfile) {
1013 CSProfileConverter CSConverter(ProfileMap);
1014 CSConverter.convertProfiles();
1015 FunctionSamples::ProfileIsCS = false;
1016 }
1017}
1018
1019void ProfileGeneratorBase::computeSummaryAndThreshold(
1020 SampleProfileMap &Profiles) {
1021 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1022 Summary = Builder.computeSummaryForProfiles(Profiles);
1023 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1024 (Summary->getDetailedSummary()));
1025 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1026 (Summary->getDetailedSummary()));
1027}
1028
1029void CSProfileGenerator::computeSummaryAndThreshold() {
1030 // Always merge and use context-less profile map to compute summary.
1031 SampleProfileMap ContextLessProfiles;
1032 ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1033
1034 // Set the flag below to avoid merging the profile again in
1035 // computeSummaryAndThreshold
1036 FunctionSamples::ProfileIsCS = false;
1037 assert((static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1040, __extension__
__PRETTY_FUNCTION__))
1038 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1040, __extension__
__PRETTY_FUNCTION__))
1039 "Don't set --profile-summary-contextless to false for profile "(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1040, __extension__
__PRETTY_FUNCTION__))
1040 "generation")(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences
() || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile "
"generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1040, __extension__
__PRETTY_FUNCTION__))
;
1041 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1042 // Recover the old value.
1043 FunctionSamples::ProfileIsCS = true;
1044}
1045
1046void ProfileGeneratorBase::extractProbesFromRange(
1047 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1048 bool FindDisjointRanges) {
1049 const RangeSample *PRanges = &RangeCounter;
1050 RangeSample Ranges;
1051 if (FindDisjointRanges) {
1052 findDisjointRanges(Ranges, RangeCounter);
1053 PRanges = &Ranges;
1054 }
1055
1056 for (const auto &Range : *PRanges) {
1057 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
1058 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
1059 uint64_t Count = Range.second;
1060
1061 InstructionPointer IP(Binary, RangeBegin, true);
1062 // Disjoint ranges may have range in the middle of two instr,
1063 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1064 // can be Addr1+1 to Addr2-1. We should ignore such range.
1065 if (IP.Address > RangeEnd)
1066 continue;
1067
1068 do {
1069 const AddressProbesMap &Address2ProbesMap =
1070 Binary->getAddress2ProbesMap();
1071 auto It = Address2ProbesMap.find(IP.Address);
1072 if (It != Address2ProbesMap.end()) {
1073 for (const auto &Probe : It->second) {
1074 ProbeCounter[&Probe] += Count;
1075 }
1076 }
1077 } while (IP.advance() && IP.Address <= RangeEnd);
1078 }
1079}
1080
1081static void
1082extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1083 const SmallVectorImpl<uint64_t> &Addresses,
1084 ProfiledBinary *Binary) {
1085 SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1086 for (auto Addr : reverse(Addresses)) {
1087 const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr);
1088 // These could be the cases when a probe is not found at a calliste. Cutting
1089 // off the context from here since the inliner will not know how to consume
1090 // a context with unknown callsites.
1091 // 1. for functions that are not sampled when
1092 // --decode-probe-for-profiled-functions-only is on.
1093 // 2. for a merged callsite. Callsite merging may cause the loss of original
1094 // probe IDs.
1095 // 3. for an external callsite.
1096 if (!CallProbe)
1097 break;
1098 Probes.push_back(CallProbe);
1099 }
1100
1101 std::reverse(Probes.begin(), Probes.end());
1102
1103 // Extract context stack for reusing, leaf context stack will be added
1104 // compressed while looking up function profile.
1105 for (const auto *P : Probes) {
1106 Binary->getInlineContextForProbe(P, ContextStack, true);
1107 }
1108}
1109
1110void CSProfileGenerator::generateProbeBasedProfile() {
1111 // Enable pseudo probe functionalities in SampleProf
1112 FunctionSamples::ProfileIsProbeBased = true;
1113 for (const auto &CI : *SampleCounters) {
1114 const AddrBasedCtxKey *CtxKey =
9
'CtxKey' initialized to a null pointer value
1115 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
8
Assuming the object is not a 'CastReturnType'
1116 SampleContextFrameVector ContextStack;
1117 extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
10
Forming reference to null pointer
1118 // Fill in function body samples from probes, also infer caller's samples
1119 // from callee's probe
1120 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
1121 // Fill in boundary samples for a call probe
1122 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
1123 }
1124}
1125
1126void CSProfileGenerator::populateBodySamplesWithProbes(
1127 const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
1128 ProbeCounterMap ProbeCounter;
1129 // Extract the top frame probes by looking up each address among the range in
1130 // the Address2ProbeMap
1131 extractProbesFromRange(RangeCounter, ProbeCounter);
1132 std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1133 std::unordered_set<FunctionSamples *>>
1134 FrameSamples;
1135 for (const auto &PI : ProbeCounter) {
1136 const MCDecodedPseudoProbe *Probe = PI.first;
1137 uint64_t Count = PI.second;
1138 // Disjoint ranges have introduce zero-filled gap that
1139 // doesn't belong to current context, filter them out.
1140 if (!Probe->isBlock() || Count == 0)
1141 continue;
1142
1143 ContextTrieNode *ContextNode =
1144 getContextNodeForLeafProbe(ContextStack, Probe);
1145 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1146 // Record the current frame and FunctionProfile whenever samples are
1147 // collected for non-danglie probes. This is for reporting all of the
1148 // zero count probes of the frame later.
1149 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1150 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1151 FunctionProfile.addTotalSamples(Count);
1152 if (Probe->isEntry()) {
1153 FunctionProfile.addHeadSamples(Count);
1154 // Look up for the caller's function profile
1155 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1156 ContextTrieNode *CallerNode = ContextNode->getParentContext();
1157 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1158 // Since the context id will be compressed, we have to use callee's
1159 // context id to infer caller's context id to ensure they share the
1160 // same context prefix.
1161 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1162 assert(CallerIndex &&(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1163, __extension__
__PRETTY_FUNCTION__))
1163 "Inferred caller's location index shouldn't be zero!")(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1163, __extension__
__PRETTY_FUNCTION__))
;
1164 FunctionSamples &CallerProfile =
1165 *getOrCreateFunctionSamples(CallerNode);
1166 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1167 CallerProfile.addBodySamples(CallerIndex, 0, Count);
1168 CallerProfile.addTotalSamples(Count);
1169 CallerProfile.addCalledTargetSamples(CallerIndex, 0,
1170 ContextNode->getFuncName(), Count);
1171 }
1172 }
1173 }
1174
1175 // Assign zero count for remaining probes without sample hits to
1176 // differentiate from probes optimized away, of which the counts are unknown
1177 // and will be inferred by the compiler.
1178 for (auto &I : FrameSamples) {
1179 for (auto *FunctionProfile : I.second) {
1180 for (auto *Probe : I.first->getProbes()) {
1181 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1182 }
1183 }
1184 }
1185}
1186
1187void CSProfileGenerator::populateBoundarySamplesWithProbes(
1188 const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
1189 for (const auto &BI : BranchCounter) {
1190 uint64_t SourceOffset = BI.first.first;
1191 uint64_t TargetOffset = BI.first.second;
1192 uint64_t Count = BI.second;
1193 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
1194 const MCDecodedPseudoProbe *CallProbe =
1195 Binary->getCallProbeForAddr(SourceAddress);
1196 if (CallProbe == nullptr)
1197 continue;
1198 FunctionSamples &FunctionProfile =
1199 getFunctionProfileForLeafProbe(ContextStack, CallProbe);
1200 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1201 FunctionProfile.addTotalSamples(Count);
1202 StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
1203 if (CalleeName.size() == 0)
1204 continue;
1205 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1206 Count);
1207 }
1208}
1209
1210ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1211 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1212
1213 // Explicitly copy the context for appending the leaf context
1214 SampleContextFrameVector NewContextStack(ContextStack.begin(),
1215 ContextStack.end());
1216 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1217 // For leaf inlined context with the top frame, we should strip off the top
1218 // frame's probe id, like:
1219 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1220 auto LeafFrame = NewContextStack.back();
1221 LeafFrame.Location = LineLocation(0, 0);
1222 NewContextStack.pop_back();
1223 // Compress the context string except for the leaf frame
1224 CSProfileGenerator::compressRecursionContext(NewContextStack);
1225 CSProfileGenerator::trimContext(NewContextStack);
1226 NewContextStack.push_back(LeafFrame);
1227
1228 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1229 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1230 ContextTrieNode *ContextNode =
1231 getOrCreateContextNode(NewContextStack, WasLeafInlined);
1232 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1233 return ContextNode;
1234}
1235
1236FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1237 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1238 return *getContextNodeForLeafProbe(ContextStack, LeafProbe)
1239 ->getFunctionSamples();
1240}
1241
1242} // end namespace sampleprof
1243} // end namespace llvm