Bug Summary

File:llvm/tools/llvm-profgen/ProfileGenerator.cpp
Warning:line 826, column 5
Forming reference to null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ProfileGenerator.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/llvm-profgen -I /build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/llvm/tools/llvm-profgen -I include -I /build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-31-132209-101875-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220131111436+ae68b3a45776/llvm/tools/llvm-profgen/ProfileGenerator.cpp
1//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ProfileGenerator.h"
10#include "ErrorHandling.h"
11#include "ProfiledBinary.h"
12#include "llvm/ProfileData/ProfileCommon.h"
13#include <float.h>
14#include <unordered_set>
15
16cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
17 cl::Required,
18 cl::desc("Output profile file"));
19static cl::alias OutputA("o", cl::desc("Alias for --output"),
20 cl::aliasopt(OutputFilename));
21
22static cl::opt<SampleProfileFormat> OutputFormat(
23 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
24 cl::values(
25 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)")llvm::cl::OptionEnumValue { "binary", int(SPF_Binary), "Binary encoding (default)"
}
,
26 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding")llvm::cl::OptionEnumValue { "compbinary", int(SPF_Compact_Binary
), "Compact binary encoding" }
,
27 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding")llvm::cl::OptionEnumValue { "extbinary", int(SPF_Ext_Binary),
"Extensible binary encoding" }
,
28 clEnumValN(SPF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(SPF_Text), "Text encoding"
}
,
29 clEnumValN(SPF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
30 "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)"
}
));
31
32cl::opt<bool> UseMD5(
33 "use-md5", cl::init(false), cl::Hidden,
34 cl::desc("Use md5 to represent function names in the output profile (only "
35 "meaningful for -extbinary)"));
36
37static cl::opt<bool> PopulateProfileSymbolList(
38 "populate-profile-symbol-list", cl::init(false), cl::Hidden,
39 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
40
41static cl::opt<bool> FillZeroForAllFuncs(
42 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
43 cl::desc("Attribute all functions' range with zero count "
44 "even it's not hit by any samples."));
45
46static cl::opt<int32_t, true> RecursionCompression(
47 "compress-recursion",
48 cl::desc("Compressing recursion by deduplicating adjacent frame "
49 "sequences up to the specified size. -1 means no size limit."),
50 cl::Hidden,
51 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
52
53static cl::opt<bool>
54 TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
55 cl::desc("If the total count of the profile is smaller "
56 "than threshold, it will be trimmed."));
57
58static cl::opt<bool> CSProfMergeColdContext(
59 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
60 cl::desc("If the total count of context profile is smaller than "
61 "the threshold, it will be merged into context-less base "
62 "profile."));
63
64static cl::opt<uint32_t> CSProfMaxColdContextDepth(
65 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
66 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
67 "context-less base profile"));
68
69static cl::opt<int, true> CSProfMaxContextDepth(
70 "csprof-max-context-depth", cl::ZeroOrMore,
71 cl::desc("Keep the last K contexts while merging profile. -1 means no "
72 "depth limit."),
73 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
74
75static cl::opt<double> HotFunctionDensityThreshold(
76 "hot-function-density-threshold", llvm::cl::init(1000),
77 llvm::cl::desc(
78 "specify density threshold for hot functions (default: 1000)"),
79 llvm::cl::Optional);
80static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
81 llvm::cl::desc("show profile density details"),
82 llvm::cl::Optional);
83
84static cl::opt<bool> UpdateTotalSamples(
85 "update-total-samples", llvm::cl::init(false),
86 llvm::cl::desc(
87 "Update total samples by accumulating all its body samples."),
88 llvm::cl::Optional);
89
90extern cl::opt<int> ProfileSummaryCutoffHot;
91
92static cl::opt<bool> GenCSNestedProfile(
93 "gen-cs-nested-profile", cl::Hidden, cl::init(false),
94 cl::desc("Generate nested function profiles for CSSPGO"));
95
96using namespace llvm;
97using namespace sampleprof;
98
99namespace llvm {
100namespace sampleprof {
101
102// Initialize the MaxCompressionSize to -1 which means no size limit
103int32_t CSProfileGenerator::MaxCompressionSize = -1;
104
105int CSProfileGenerator::MaxContextDepth = -1;
106
107bool ProfileGeneratorBase::UseFSDiscriminator = false;
108
109std::unique_ptr<ProfileGeneratorBase>
110ProfileGeneratorBase::create(ProfiledBinary *Binary,
111 const ContextSampleCounterMap &SampleCounters,
112 bool ProfileIsCSFlat) {
113 std::unique_ptr<ProfileGeneratorBase> Generator;
114 if (ProfileIsCSFlat) {
115 if (Binary->useFSDiscriminator())
116 exitWithError("FS discriminator is not supported in CS profile.");
117 Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
118 } else {
119 Generator.reset(new ProfileGenerator(Binary, SampleCounters));
120 }
121 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
122 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
123
124 return Generator;
125}
126
127void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
128 SampleProfileMap &ProfileMap) {
129 // Populate profile symbol list if extended binary format is used.
130 ProfileSymbolList SymbolList;
131
132 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
133 Binary->populateSymbolListFromDWARF(SymbolList);
134 Writer->setProfileSymbolList(&SymbolList);
135 }
136
137 if (std::error_code EC = Writer->write(ProfileMap))
138 exitWithError(std::move(EC));
139}
140
141void ProfileGeneratorBase::write() {
142 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
143 if (std::error_code EC = WriterOrErr.getError())
144 exitWithError(EC, OutputFilename);
145
146 if (UseMD5) {
147 if (OutputFormat != SPF_Ext_Binary)
148 WithColor::warning() << "-use-md5 is ignored. Specify "
149 "--format=extbinary to enable it\n";
150 else
151 WriterOrErr.get()->setUseMD5();
152 }
153
154 write(std::move(WriterOrErr.get()), ProfileMap);
155}
156
157void ProfileGeneratorBase::showDensitySuggestion(double Density) {
158 if (Density == 0.0)
159 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
160 "set too low. Please check your command.\n";
161 else if (Density < HotFunctionDensityThreshold)
162 WithColor::warning()
163 << "AutoFDO is estimated to optimize better with "
164 << format("%.1f", HotFunctionDensityThreshold / Density)
165 << "x more samples. Please consider increasing sampling rate or "
166 "profiling for longer duration to get more samples.\n";
167
168 if (ShowDensity)
169 outs() << "Minimum profile density for hot functions with top "
170 << format("%.2f",
171 static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
172 10000)
173 << "% total samples: " << format("%.1f", Density) << "\n";
174}
175
176double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
177 uint64_t HotCntThreshold) {
178 double Density = DBL_MAX1.7976931348623157e+308;
179 std::vector<const FunctionSamples *> HotFuncs;
180 for (auto &I : Profiles) {
181 auto &FuncSamples = I.second;
182 if (FuncSamples.getTotalSamples() < HotCntThreshold)
183 continue;
184 HotFuncs.emplace_back(&FuncSamples);
185 }
186
187 for (auto *FuncSamples : HotFuncs) {
188 auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
189 if (!Func)
190 continue;
191 uint64_t FuncSize = Func->getFuncSize();
192 if (FuncSize == 0)
193 continue;
194 Density =
195 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
196 FuncSize);
197 }
198
199 return Density == DBL_MAX1.7976931348623157e+308 ? 0.0 : Density;
200}
201
202void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
203 const RangeSample &Ranges) {
204
205 /*
206 Regions may overlap with each other. Using the boundary info, find all
207 disjoint ranges and their sample count. BoundaryPoint contains the count
208 multiple samples begin/end at this points.
209
210 |<--100-->| Sample1
211 |<------200------>| Sample2
212 A B C
213
214 In the example above,
215 Sample1 begins at A, ends at B, its value is 100.
216 Sample2 beings at A, ends at C, its value is 200.
217 For A, BeginCount is the sum of sample begins at A, which is 300 and no
218 samples ends at A, so EndCount is 0.
219 Then boundary points A, B, and C with begin/end counts are:
220 A: (300, 0)
221 B: (0, 100)
222 C: (0, 200)
223 */
224 struct BoundaryPoint {
225 // Sum of sample counts beginning at this point
226 uint64_t BeginCount = UINT64_MAX(18446744073709551615UL);
227 // Sum of sample counts ending at this point
228 uint64_t EndCount = UINT64_MAX(18446744073709551615UL);
229 // Is the begin point of a zero range.
230 bool IsZeroRangeBegin = false;
231 // Is the end point of a zero range.
232 bool IsZeroRangeEnd = false;
233
234 void addBeginCount(uint64_t Count) {
235 if (BeginCount == UINT64_MAX(18446744073709551615UL))
236 BeginCount = 0;
237 BeginCount += Count;
238 }
239
240 void addEndCount(uint64_t Count) {
241 if (EndCount == UINT64_MAX(18446744073709551615UL))
242 EndCount = 0;
243 EndCount += Count;
244 }
245 };
246
247 /*
248 For the above example. With boundary points, follwing logic finds two
249 disjoint region of
250
251 [A,B]: 300
252 [B+1,C]: 200
253
254 If there is a boundary point that both begin and end, the point itself
255 becomes a separate disjoint region. For example, if we have original
256 ranges of
257
258 |<--- 100 --->|
259 |<--- 200 --->|
260 A B C
261
262 there are three boundary points with their begin/end counts of
263
264 A: (100, 0)
265 B: (200, 100)
266 C: (0, 200)
267
268 the disjoint ranges would be
269
270 [A, B-1]: 100
271 [B, B]: 300
272 [B+1, C]: 200.
273
274 Example for zero value range:
275
276 |<--- 100 --->|
277 |<--- 200 --->|
278 |<--------------- 0 ----------------->|
279 A B C D E F
280
281 [A, B-1] : 0
282 [B, C] : 100
283 [C+1, D-1]: 0
284 [D, E] : 200
285 [E+1, F] : 0
286 */
287 std::map<uint64_t, BoundaryPoint> Boundaries;
288
289 for (const auto &Item : Ranges) {
290 assert(Item.first.first <= Item.first.second &&(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 291, __extension__
__PRETTY_FUNCTION__))
291 "Invalid instruction range")(static_cast <bool> (Item.first.first <= Item.first.
second && "Invalid instruction range") ? void (0) : __assert_fail
("Item.first.first <= Item.first.second && \"Invalid instruction range\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 291, __extension__
__PRETTY_FUNCTION__))
;
292 auto &BeginPoint = Boundaries[Item.first.first];
293 auto &EndPoint = Boundaries[Item.first.second];
294 uint64_t Count = Item.second;
295
296 BeginPoint.addBeginCount(Count);
297 EndPoint.addEndCount(Count);
298 if (Count == 0) {
299 BeginPoint.IsZeroRangeBegin = true;
300 EndPoint.IsZeroRangeEnd = true;
301 }
302 }
303
304 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
305 // and the next valid address
306 uint64_t BeginAddress = UINT64_MAX(18446744073709551615UL);
307 int ZeroRangeDepth = 0;
308 uint64_t Count = 0;
309 for (const auto &Item : Boundaries) {
310 uint64_t Address = Item.first;
311 const BoundaryPoint &Point = Item.second;
312 if (Point.BeginCount != UINT64_MAX(18446744073709551615UL)) {
313 if (BeginAddress != UINT64_MAX(18446744073709551615UL))
314 DisjointRanges[{BeginAddress, Address - 1}] = Count;
315 Count += Point.BeginCount;
316 BeginAddress = Address;
317 ZeroRangeDepth += Point.IsZeroRangeBegin;
318 }
319 if (Point.EndCount != UINT64_MAX(18446744073709551615UL)) {
320 assert((BeginAddress != UINT64_MAX) &&(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 321, __extension__
__PRETTY_FUNCTION__))
321 "First boundary point cannot be 'end' point")(static_cast <bool> ((BeginAddress != (18446744073709551615UL
)) && "First boundary point cannot be 'end' point") ?
void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 321, __extension__
__PRETTY_FUNCTION__))
;
322 DisjointRanges[{BeginAddress, Address}] = Count;
323 assert(Count >= Point.EndCount && "Mismatched live ranges")(static_cast <bool> (Count >= Point.EndCount &&
"Mismatched live ranges") ? void (0) : __assert_fail ("Count >= Point.EndCount && \"Mismatched live ranges\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 323, __extension__
__PRETTY_FUNCTION__))
;
324 Count -= Point.EndCount;
325 BeginAddress = Address + 1;
326 ZeroRangeDepth -= Point.IsZeroRangeEnd;
327 // If the remaining count is zero and it's no longer in a zero range, this
328 // means we consume all the ranges before, thus mark BeginAddress as
329 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
330 // [<---- 10 ---->]
331 // [<---- 20 ---->]
332 // A B C D
333 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
334 // have the [B+1, C-1] zero range.
335 if (Count == 0 && ZeroRangeDepth == 0)
336 BeginAddress = UINT64_MAX(18446744073709551615UL);
337 }
338 }
339}
340
341void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
342 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
343 uint64_t Count) {
344 // Use the maximum count of samples with same line location
345 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
346
347 // Use duplication factor to compensated for loop unroll/vectorization.
348 // Note that this is only needed when we're taking MAX of the counts at
349 // the location instead of SUM.
350 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
351
352 ErrorOr<uint64_t> R =
353 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
354
355 uint64_t PreviousCount = R ? R.get() : 0;
356 if (PreviousCount <= Count) {
357 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
358 Count - PreviousCount);
359 }
360}
361
362void ProfileGeneratorBase::updateTotalSamples() {
363 if (!UpdateTotalSamples)
364 return;
365
366 for (auto &Item : ProfileMap) {
367 FunctionSamples &FunctionProfile = Item.second;
368 FunctionProfile.updateTotalSamples();
369 }
370}
371
372FunctionSamples &
373ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
374 SampleContext Context(FuncName);
375 auto Ret = ProfileMap.emplace(Context, FunctionSamples());
376 if (Ret.second) {
377 FunctionSamples &FProfile = Ret.first->second;
378 FProfile.setContext(Context);
379 }
380 return Ret.first->second;
381}
382
383void ProfileGenerator::generateProfile() {
384 if (Binary->usePseudoProbes()) {
385 // TODO: Support probe based profile generation
386 exitWithError("Probe based profile generation not supported for AutoFDO, "
387 "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`.");
388 } else {
389 generateLineNumBasedProfile();
390 }
391 postProcessProfiles();
392}
393
394void ProfileGenerator::postProcessProfiles() {
395 computeSummaryAndThreshold();
396 trimColdProfiles(ProfileMap, ColdCountThreshold);
397 calculateAndShowDensity(ProfileMap);
398}
399
400void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
401 uint64_t ColdCntThreshold) {
402 if (!TrimColdProfile)
403 return;
404
405 // Move cold profiles into a tmp container.
406 std::vector<SampleContext> ColdProfiles;
407 for (const auto &I : ProfileMap) {
408 if (I.second.getTotalSamples() < ColdCntThreshold)
409 ColdProfiles.emplace_back(I.first);
410 }
411
412 // Remove the cold profile from ProfileMap.
413 for (const auto &I : ColdProfiles)
414 ProfileMap.erase(I);
415}
416
417void ProfileGenerator::generateLineNumBasedProfile() {
418 assert(SampleCounters.size() == 1 &&(static_cast <bool> (SampleCounters.size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters.size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 419, __extension__
__PRETTY_FUNCTION__))
419 "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters.size() == 1 &&
"Must have one entry for profile generation.") ? void (0) : __assert_fail
("SampleCounters.size() == 1 && \"Must have one entry for profile generation.\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 419, __extension__
__PRETTY_FUNCTION__))
;
420 const SampleCounter &SC = SampleCounters.begin()->second;
421 // Fill in function body samples
422 populateBodySamplesForAllFunctions(SC.RangeCounter);
423 // Fill in boundary sample counts as well as call site samples for calls
424 populateBoundarySamplesForAllFunctions(SC.BranchCounter);
425
426 updateTotalSamples();
427}
428
429FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
430 const SampleContextFrameVector &FrameVec, uint64_t Count) {
431 // Get top level profile
432 FunctionSamples *FunctionProfile =
433 &getTopLevelFunctionProfile(FrameVec[0].FuncName);
434 FunctionProfile->addTotalSamples(Count);
435
436 for (size_t I = 1; I < FrameVec.size(); I++) {
437 LineLocation Callsite(
438 FrameVec[I - 1].Location.LineOffset,
439 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
440 FunctionSamplesMap &SamplesMap =
441 FunctionProfile->functionSamplesAt(Callsite);
442 auto Ret =
443 SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
444 if (Ret.second) {
445 SampleContext Context(FrameVec[I].FuncName);
446 Ret.first->second.setContext(Context);
447 }
448 FunctionProfile = &Ret.first->second;
449 FunctionProfile->addTotalSamples(Count);
450 }
451
452 return *FunctionProfile;
453}
454
455RangeSample
456ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
457 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
458 if (FillZeroForAllFuncs) {
459 for (auto &FuncI : Binary->getAllBinaryFunctions()) {
460 for (auto &R : FuncI.second.Ranges) {
461 Ranges[{R.first, R.second - 1}] += 0;
462 }
463 }
464 } else {
465 // For each range, we search for all ranges of the function it belongs to
466 // and initialize it with zero count, so it remains zero if doesn't hit any
467 // samples. This is to be consistent with compiler that interpret zero count
468 // as unexecuted(cold).
469 for (const auto &I : RangeCounter) {
470 uint64_t StartOffset = I.first.first;
471 for (const auto &Range : Binary->getRangesForOffset(StartOffset))
472 Ranges[{Range.first, Range.second - 1}] += 0;
473 }
474 }
475 RangeSample DisjointRanges;
476 findDisjointRanges(DisjointRanges, Ranges);
477 return DisjointRanges;
478}
479
480void ProfileGenerator::populateBodySamplesForAllFunctions(
481 const RangeSample &RangeCounter) {
482 for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
483 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
484 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
485 uint64_t Count = Range.second;
486
487 InstructionPointer IP(Binary, RangeBegin, true);
488 // Disjoint ranges may have range in the middle of two instr,
489 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
490 // can be Addr1+1 to Addr2-1. We should ignore such range.
491 if (IP.Address > RangeEnd)
492 continue;
493
494 do {
495 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
496 const SampleContextFrameVector &FrameVec =
497 Binary->getFrameLocationStack(Offset);
498 if (!FrameVec.empty()) {
499 // FIXME: As accumulating total count per instruction caused some
500 // regression, we changed to accumulate total count per byte as a
501 // workaround. Tuning hotness threshold on the compiler side might be
502 // necessary in the future.
503 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
504 FrameVec, Count * Binary->getInstSize(Offset));
505 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
506 Count);
507 }
508 } while (IP.advance() && IP.Address <= RangeEnd);
509 }
510}
511
512StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
513 // Get the function range by branch target if it's a call branch.
514 auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
515
516 // We won't accumulate sample count for a range whose start is not the real
517 // function entry such as outlined function or inner labels.
518 if (!FRange || !FRange->IsFuncEntry)
519 return StringRef();
520
521 return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
522}
523
524void ProfileGenerator::populateBoundarySamplesForAllFunctions(
525 const BranchSample &BranchCounters) {
526 for (const auto &Entry : BranchCounters) {
527 uint64_t SourceOffset = Entry.first.first;
528 uint64_t TargetOffset = Entry.first.second;
529 uint64_t Count = Entry.second;
530 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 530, __extension__
__PRETTY_FUNCTION__))
;
531
532 StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
533 if (CalleeName.size() == 0)
534 continue;
535 // Record called target sample and its count.
536 const SampleContextFrameVector &FrameVec =
537 Binary->getFrameLocationStack(SourceOffset);
538 if (!FrameVec.empty()) {
539 FunctionSamples &FunctionProfile =
540 getLeafProfileAndAddTotalSamples(FrameVec, 0);
541 FunctionProfile.addCalledTargetSamples(
542 FrameVec.back().Location.LineOffset,
543 getBaseDiscriminator(FrameVec.back().Location.Discriminator),
544 CalleeName, Count);
545 }
546 // Add head samples for callee.
547 FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
548 CalleeProfile.addHeadSamples(Count);
549 }
550}
551
552void ProfileGeneratorBase::calculateAndShowDensity(
553 const SampleProfileMap &Profiles) {
554 double Density = calculateDensity(Profiles, HotCountThreshold);
555 showDensitySuggestion(Density);
556}
557
558FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
559 const SampleContextFrameVector &Context, bool WasLeafInlined) {
560 auto I = ProfileMap.find(SampleContext(Context));
561 if (I == ProfileMap.end()) {
562 // Save the new context for future references.
563 SampleContextFrames NewContext = *Contexts.insert(Context).first;
564 SampleContext FContext(NewContext, RawContext);
565 auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
566 if (WasLeafInlined)
567 FContext.setAttribute(ContextWasInlined);
568 FunctionSamples &FProfile = Ret.first->second;
569 FProfile.setContext(FContext);
570 return Ret.first->second;
571 }
572 return I->second;
573}
574
575void CSProfileGenerator::generateProfile() {
576 FunctionSamples::ProfileIsCSFlat = true;
577
578 if (Binary->getTrackFuncContextSize())
1
Assuming the condition is false
2
Taking false branch
579 computeSizeForProfiledFunctions();
580
581 if (Binary->usePseudoProbes()) {
3
Assuming the condition is true
4
Taking true branch
582 // Enable pseudo probe functionalities in SampleProf
583 FunctionSamples::ProfileIsProbeBased = true;
584 generateProbeBasedProfile();
5
Calling 'CSProfileGenerator::generateProbeBasedProfile'
585 } else {
586 generateLineNumBasedProfile();
587 }
588 postProcessProfiles();
589}
590
591void CSProfileGenerator::computeSizeForProfiledFunctions() {
592 // Hash map to deduplicate the function range and the item is a pair of
593 // function start and end offset.
594 std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
595 // Go through all the ranges in the CS counters, use the start of the range to
596 // look up the function it belongs and record the function range.
597 for (const auto &CI : SampleCounters) {
598 for (const auto &Item : CI.second.RangeCounter) {
599 // FIXME: Filter the bogus crossing function range.
600 uint64_t StartOffset = Item.first.first;
601 // Note that a function can be spilt into multiple ranges, so get all
602 // ranges of the function.
603 for (const auto &Range : Binary->getRangesForOffset(StartOffset))
604 AggregatedRanges[Range.first] = Range.second;
605 }
606 }
607
608 for (const auto &I : AggregatedRanges) {
609 uint64_t StartOffset = I.first;
610 uint64_t EndOffset = I.second;
611 Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
612 }
613}
614
615void CSProfileGenerator::generateLineNumBasedProfile() {
616 for (const auto &CI : SampleCounters) {
617 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
618
619 // Get or create function profile for the range
620 FunctionSamples &FunctionProfile =
621 getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
622
623 // Fill in function body samples
624 populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
625 // Fill in boundary sample counts as well as call site samples for calls
626 populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
627 CI.second.BranchCounter);
628 }
629 // Fill in call site value sample for inlined calls and also use context to
630 // infer missing samples. Since we don't have call count for inlined
631 // functions, we estimate it from inlinee's profile using the entry of the
632 // body sample.
633 populateInferredFunctionSamples();
634
635 updateTotalSamples();
636}
637
638void CSProfileGenerator::populateBodySamplesForFunction(
639 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
640 // Compute disjoint ranges first, so we can use MAX
641 // for calculating count for each location.
642 RangeSample Ranges;
643 findDisjointRanges(Ranges, RangeCounter);
644 for (const auto &Range : Ranges) {
645 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
646 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
647 uint64_t Count = Range.second;
648 // Disjoint ranges have introduce zero-filled gap that
649 // doesn't belong to current context, filter them out.
650 if (Count == 0)
651 continue;
652
653 InstructionPointer IP(Binary, RangeBegin, true);
654 // Disjoint ranges may have range in the middle of two instr,
655 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
656 // can be Addr1+1 to Addr2-1. We should ignore such range.
657 if (IP.Address > RangeEnd)
658 continue;
659
660 do {
661 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
662 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
663 if (LeafLoc.hasValue()) {
664 // Recording body sample for this specific context
665 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
666 FunctionProfile.addTotalSamples(Count);
667 }
668 } while (IP.advance() && IP.Address <= RangeEnd);
669 }
670}
671
672void CSProfileGenerator::populateBoundarySamplesForFunction(
673 SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
674 const BranchSample &BranchCounters) {
675
676 for (const auto &Entry : BranchCounters) {
677 uint64_t SourceOffset = Entry.first.first;
678 uint64_t TargetOffset = Entry.first.second;
679 uint64_t Count = Entry.second;
680 assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch"
) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 680, __extension__
__PRETTY_FUNCTION__))
;
681
682 StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
683 if (CalleeName.size() == 0)
684 continue;
685
686 // Record called target sample and its count
687 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
688 if (!LeafLoc.hasValue())
689 continue;
690 FunctionProfile.addCalledTargetSamples(
691 LeafLoc->Location.LineOffset,
692 getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
693 Count);
694
695 // Record head sample for called target(callee)
696 SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
697 assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&(static_cast <bool> (CalleeCtx.back().FuncName == LeafLoc
->FuncName && "Leaf function name doesn't match") ?
void (0) : __assert_fail ("CalleeCtx.back().FuncName == LeafLoc->FuncName && \"Leaf function name doesn't match\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 698, __extension__
__PRETTY_FUNCTION__))
698 "Leaf function name doesn't match")(static_cast <bool> (CalleeCtx.back().FuncName == LeafLoc
->FuncName && "Leaf function name doesn't match") ?
void (0) : __assert_fail ("CalleeCtx.back().FuncName == LeafLoc->FuncName && \"Leaf function name doesn't match\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 698, __extension__
__PRETTY_FUNCTION__))
;
699 CalleeCtx.back() = *LeafLoc;
700 CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
701 FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
702 CalleeProfile.addHeadSamples(Count);
703 }
704}
705
706static SampleContextFrame
707getCallerContext(SampleContextFrames CalleeContext,
708 SampleContextFrameVector &CallerContext) {
709 assert(CalleeContext.size() > 1 && "Unexpected empty context")(static_cast <bool> (CalleeContext.size() > 1 &&
"Unexpected empty context") ? void (0) : __assert_fail ("CalleeContext.size() > 1 && \"Unexpected empty context\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 709, __extension__
__PRETTY_FUNCTION__))
;
710 CalleeContext = CalleeContext.drop_back();
711 CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
712 SampleContextFrame CallerFrame = CallerContext.back();
713 CallerContext.back().Location = LineLocation(0, 0);
714 return CallerFrame;
715}
716
717void CSProfileGenerator::populateInferredFunctionSamples() {
718 for (const auto &Item : ProfileMap) {
719 const auto &CalleeContext = Item.first;
720 const FunctionSamples &CalleeProfile = Item.second;
721
722 // If we already have head sample counts, we must have value profile
723 // for call sites added already. Skip to avoid double counting.
724 if (CalleeProfile.getHeadSamples())
725 continue;
726 // If we don't have context, nothing to do for caller's call site.
727 // This could happen for entry point function.
728 if (CalleeContext.isBaseContext())
729 continue;
730
731 // Infer Caller's frame loc and context ID through string splitting
732 SampleContextFrameVector CallerContextId;
733 SampleContextFrame &&CallerLeafFrameLoc =
734 getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
735 SampleContextFrames CallerContext(CallerContextId);
736
737 // It's possible that we haven't seen any sample directly in the caller,
738 // in which case CallerProfile will not exist. But we can't modify
739 // ProfileMap while iterating it.
740 // TODO: created function profile for those callers too
741 if (ProfileMap.find(CallerContext) == ProfileMap.end())
742 continue;
743 FunctionSamples &CallerProfile = ProfileMap[CallerContext];
744
745 // Since we don't have call count for inlined functions, we
746 // estimate it from inlinee's profile using entry body sample.
747 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
748 // If we don't have samples with location, use 1 to indicate live.
749 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
750 EstimatedCallCount = 1;
751 CallerProfile.addCalledTargetSamples(
752 CallerLeafFrameLoc.Location.LineOffset,
753 CallerLeafFrameLoc.Location.Discriminator,
754 CalleeProfile.getContext().getName(), EstimatedCallCount);
755 CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
756 CallerLeafFrameLoc.Location.Discriminator,
757 EstimatedCallCount);
758 CallerProfile.addTotalSamples(EstimatedCallCount);
759 }
760}
761
762void CSProfileGenerator::postProcessProfiles() {
763 // Compute hot/cold threshold based on profile. This will be used for cold
764 // context profile merging/trimming.
765 computeSummaryAndThreshold();
766
767 // Run global pre-inliner to adjust/merge context profile based on estimated
768 // inline decisions.
769 if (EnableCSPreInliner) {
770 CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
771 .run();
772 // Turn off the profile merger by default unless it is explicitly enabled.
773 if (!CSProfMergeColdContext.getNumOccurrences())
774 CSProfMergeColdContext = false;
775 }
776
777 // Trim and merge cold context profile using cold threshold above.
778 if (TrimColdProfile || CSProfMergeColdContext) {
779 SampleContextTrimmer(ProfileMap)
780 .trimAndMergeColdContextProfiles(
781 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
782 CSProfMaxColdContextDepth, EnableCSPreInliner);
783 }
784
785 // Merge function samples of CS profile to calculate profile density.
786 sampleprof::SampleProfileMap ContextLessProfiles;
787 for (const auto &I : ProfileMap) {
788 ContextLessProfiles[I.second.getName()].merge(I.second);
789 }
790
791 calculateAndShowDensity(ContextLessProfiles);
792 if (GenCSNestedProfile) {
793 CSProfileConverter CSConverter(ProfileMap);
794 CSConverter.convertProfiles();
795 FunctionSamples::ProfileIsCSFlat = false;
796 FunctionSamples::ProfileIsCSNested = EnableCSPreInliner;
797 }
798}
799
800void ProfileGeneratorBase::computeSummaryAndThreshold() {
801 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
802 auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
803 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
804 (Summary->getDetailedSummary()));
805 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
806 (Summary->getDetailedSummary()));
807}
808
809// Helper function to extract context prefix string stack
810// Extract context stack for reusing, leaf context stack will
811// be added compressed while looking up function profile
812static void extractPrefixContextStack(
813 SampleContextFrameVector &ContextStack,
814 const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
815 ProfiledBinary *Binary) {
816 for (const auto *P : Probes) {
817 Binary->getInlineContextForProbe(P, ContextStack, true);
818 }
819}
820
821void CSProfileGenerator::generateProbeBasedProfile() {
822 for (const auto &CI : SampleCounters) {
823 const ProbeBasedCtxKey *CtxKey =
7
'CtxKey' initialized to a null pointer value
824 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
6
Assuming the object is not a 'ProbeBasedCtxKey'
825 SampleContextFrameVector ContextStack;
826 extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
8
Forming reference to null pointer
827 // Fill in function body samples from probes, also infer caller's samples
828 // from callee's probe
829 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
830 // Fill in boundary samples for a call probe
831 populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
832 }
833}
834
835void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
836 ProbeCounterMap &ProbeCounter) {
837 RangeSample Ranges;
838 findDisjointRanges(Ranges, RangeCounter);
839 for (const auto &Range : Ranges) {
840 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
841 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
842 uint64_t Count = Range.second;
843 // Disjoint ranges have introduce zero-filled gap that
844 // doesn't belong to current context, filter them out.
845 if (Count == 0)
846 continue;
847
848 InstructionPointer IP(Binary, RangeBegin, true);
849 // Disjoint ranges may have range in the middle of two instr,
850 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
851 // can be Addr1+1 to Addr2-1. We should ignore such range.
852 if (IP.Address > RangeEnd)
853 continue;
854
855 do {
856 const AddressProbesMap &Address2ProbesMap =
857 Binary->getAddress2ProbesMap();
858 auto It = Address2ProbesMap.find(IP.Address);
859 if (It != Address2ProbesMap.end()) {
860 for (const auto &Probe : It->second) {
861 if (!Probe.isBlock())
862 continue;
863 ProbeCounter[&Probe] += Count;
864 }
865 }
866 } while (IP.advance() && IP.Address <= RangeEnd);
867 }
868}
869
870void CSProfileGenerator::populateBodySamplesWithProbes(
871 const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
872 ProbeCounterMap ProbeCounter;
873 // Extract the top frame probes by looking up each address among the range in
874 // the Address2ProbeMap
875 extractProbesFromRange(RangeCounter, ProbeCounter);
876 std::unordered_map<MCDecodedPseudoProbeInlineTree *,
877 std::unordered_set<FunctionSamples *>>
878 FrameSamples;
879 for (const auto &PI : ProbeCounter) {
880 const MCDecodedPseudoProbe *Probe = PI.first;
881 uint64_t Count = PI.second;
882 FunctionSamples &FunctionProfile =
883 getFunctionProfileForLeafProbe(ContextStack, Probe);
884 // Record the current frame and FunctionProfile whenever samples are
885 // collected for non-danglie probes. This is for reporting all of the
886 // zero count probes of the frame later.
887 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
888 FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
889 FunctionProfile.addTotalSamples(Count);
890 if (Probe->isEntry()) {
891 FunctionProfile.addHeadSamples(Count);
892 // Look up for the caller's function profile
893 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
894 SampleContextFrames CalleeContextId =
895 FunctionProfile.getContext().getContextFrames();
896 if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
897 // Since the context id will be compressed, we have to use callee's
898 // context id to infer caller's context id to ensure they share the
899 // same context prefix.
900 SampleContextFrameVector CallerContextId;
901 SampleContextFrame &&CallerLeafFrameLoc =
902 getCallerContext(CalleeContextId, CallerContextId);
903 uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
904 assert(CallerIndex &&(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 905, __extension__
__PRETTY_FUNCTION__))
905 "Inferred caller's location index shouldn't be zero!")(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!"
) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\""
, "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 905, __extension__
__PRETTY_FUNCTION__))
;
906 FunctionSamples &CallerProfile =
907 getFunctionProfileForContext(CallerContextId);
908 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
909 CallerProfile.addBodySamples(CallerIndex, 0, Count);
910 CallerProfile.addTotalSamples(Count);
911 CallerProfile.addCalledTargetSamples(
912 CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
913 }
914 }
915 }
916
917 // Assign zero count for remaining probes without sample hits to
918 // differentiate from probes optimized away, of which the counts are unknown
919 // and will be inferred by the compiler.
920 for (auto &I : FrameSamples) {
921 for (auto *FunctionProfile : I.second) {
922 for (auto *Probe : I.first->getProbes()) {
923 FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
924 }
925 }
926 }
927}
928
929void CSProfileGenerator::populateBoundarySamplesWithProbes(
930 const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
931 for (const auto &BI : BranchCounter) {
932 uint64_t SourceOffset = BI.first.first;
933 uint64_t TargetOffset = BI.first.second;
934 uint64_t Count = BI.second;
935 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
936 const MCDecodedPseudoProbe *CallProbe =
937 Binary->getCallProbeForAddr(SourceAddress);
938 if (CallProbe == nullptr)
939 continue;
940 FunctionSamples &FunctionProfile =
941 getFunctionProfileForLeafProbe(ContextStack, CallProbe);
942 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
943 FunctionProfile.addTotalSamples(Count);
944 StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
945 if (CalleeName.size() == 0)
946 continue;
947 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
948 Count);
949 }
950}
951
952FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
953 SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
954
955 // Explicitly copy the context for appending the leaf context
956 SampleContextFrameVector NewContextStack(ContextStack.begin(),
957 ContextStack.end());
958 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
959 // For leaf inlined context with the top frame, we should strip off the top
960 // frame's probe id, like:
961 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
962 auto LeafFrame = NewContextStack.back();
963 LeafFrame.Location = LineLocation(0, 0);
964 NewContextStack.pop_back();
965 // Compress the context string except for the leaf frame
966 CSProfileGenerator::compressRecursionContext(NewContextStack);
967 CSProfileGenerator::trimContext(NewContextStack);
968 NewContextStack.push_back(LeafFrame);
969
970 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
971 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
972 FunctionSamples &FunctionProile =
973 getFunctionProfileForContext(NewContextStack, WasLeafInlined);
974 FunctionProile.setFunctionHash(FuncDesc->FuncHash);
975 return FunctionProile;
976}
977
978} // end namespace sampleprof
979} // end namespace llvm