File: | build/source/llvm/tools/llvm-profgen/ProfileGenerator.cpp |
Warning: | line 1238, column 3 Forming reference to null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | #include "ProfileGenerator.h" | |||
9 | #include "ErrorHandling.h" | |||
10 | #include "MissingFrameInferrer.h" | |||
11 | #include "PerfReader.h" | |||
12 | #include "ProfiledBinary.h" | |||
13 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" | |||
14 | #include "llvm/ProfileData/ProfileCommon.h" | |||
15 | #include <algorithm> | |||
16 | #include <float.h> | |||
17 | #include <unordered_set> | |||
18 | #include <utility> | |||
19 | ||||
20 | cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), | |||
21 | cl::Required, | |||
22 | cl::desc("Output profile file")); | |||
23 | static cl::alias OutputA("o", cl::desc("Alias for --output"), | |||
24 | cl::aliasopt(OutputFilename)); | |||
25 | ||||
26 | static cl::opt<SampleProfileFormat> OutputFormat( | |||
27 | "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), | |||
28 | cl::values( | |||
29 | clEnumValN(SPF_Binary, "binary", "Binary encoding (default)")llvm::cl::OptionEnumValue { "binary", int(SPF_Binary), "Binary encoding (default)" }, | |||
30 | clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding")llvm::cl::OptionEnumValue { "extbinary", int(SPF_Ext_Binary), "Extensible binary encoding" }, | |||
31 | clEnumValN(SPF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(SPF_Text), "Text encoding" }, | |||
32 | clEnumValN(SPF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)" } | |||
33 | "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)" })); | |||
34 | ||||
35 | static cl::opt<bool> UseMD5( | |||
36 | "use-md5", cl::Hidden, | |||
37 | cl::desc("Use md5 to represent function names in the output profile (only " | |||
38 | "meaningful for -extbinary)")); | |||
39 | ||||
40 | static cl::opt<bool> PopulateProfileSymbolList( | |||
41 | "populate-profile-symbol-list", cl::init(false), cl::Hidden, | |||
42 | cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); | |||
43 | ||||
44 | static cl::opt<bool> FillZeroForAllFuncs( | |||
45 | "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, | |||
46 | cl::desc("Attribute all functions' range with zero count " | |||
47 | "even it's not hit by any samples.")); | |||
48 | ||||
49 | static cl::opt<int32_t, true> RecursionCompression( | |||
50 | "compress-recursion", | |||
51 | cl::desc("Compressing recursion by deduplicating adjacent frame " | |||
52 | "sequences up to the specified size. -1 means no size limit."), | |||
53 | cl::Hidden, | |||
54 | cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); | |||
55 | ||||
56 | static cl::opt<bool> | |||
57 | TrimColdProfile("trim-cold-profile", | |||
58 | cl::desc("If the total count of the profile is smaller " | |||
59 | "than threshold, it will be trimmed.")); | |||
60 | ||||
61 | static cl::opt<bool> CSProfMergeColdContext( | |||
62 | "csprof-merge-cold-context", cl::init(true), | |||
63 | cl::desc("If the total count of context profile is smaller than " | |||
64 | "the threshold, it will be merged into context-less base " | |||
65 | "profile.")); | |||
66 | ||||
67 | static cl::opt<uint32_t> CSProfMaxColdContextDepth( | |||
68 | "csprof-max-cold-context-depth", cl::init(1), | |||
69 | cl::desc("Keep the last K contexts while merging cold profile. 1 means the " | |||
70 | "context-less base profile")); | |||
71 | ||||
72 | static cl::opt<int, true> CSProfMaxContextDepth( | |||
73 | "csprof-max-context-depth", | |||
74 | cl::desc("Keep the last K contexts while merging profile. -1 means no " | |||
75 | "depth limit."), | |||
76 | cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); | |||
77 | ||||
78 | static cl::opt<double> HotFunctionDensityThreshold( | |||
79 | "hot-function-density-threshold", llvm::cl::init(1000), | |||
80 | llvm::cl::desc( | |||
81 | "specify density threshold for hot functions (default: 1000)"), | |||
82 | llvm::cl::Optional); | |||
83 | static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), | |||
84 | llvm::cl::desc("show profile density details"), | |||
85 | llvm::cl::Optional); | |||
86 | ||||
87 | static cl::opt<bool> UpdateTotalSamples( | |||
88 | "update-total-samples", llvm::cl::init(false), | |||
89 | llvm::cl::desc( | |||
90 | "Update total samples by accumulating all its body samples."), | |||
91 | llvm::cl::Optional); | |||
92 | ||||
93 | static cl::opt<bool> GenCSNestedProfile( | |||
94 | "gen-cs-nested-profile", cl::Hidden, cl::init(true), | |||
95 | cl::desc("Generate nested function profiles for CSSPGO")); | |||
96 | ||||
97 | cl::opt<bool> InferMissingFrames( | |||
98 | "infer-missing-frames", llvm::cl::init(true), | |||
99 | llvm::cl::desc( | |||
100 | "Infer missing call frames due to compiler tail call elimination."), | |||
101 | llvm::cl::Optional); | |||
102 | ||||
103 | using namespace llvm; | |||
104 | using namespace sampleprof; | |||
105 | ||||
106 | namespace llvm { | |||
107 | extern cl::opt<int> ProfileSummaryCutoffHot; | |||
108 | extern cl::opt<bool> UseContextLessSummary; | |||
109 | ||||
110 | namespace sampleprof { | |||
111 | ||||
112 | // Initialize the MaxCompressionSize to -1 which means no size limit | |||
113 | int32_t CSProfileGenerator::MaxCompressionSize = -1; | |||
114 | ||||
115 | int CSProfileGenerator::MaxContextDepth = -1; | |||
116 | ||||
117 | bool ProfileGeneratorBase::UseFSDiscriminator = false; | |||
118 | ||||
119 | std::unique_ptr<ProfileGeneratorBase> | |||
120 | ProfileGeneratorBase::create(ProfiledBinary *Binary, | |||
121 | const ContextSampleCounterMap *SampleCounters, | |||
122 | bool ProfileIsCS) { | |||
123 | std::unique_ptr<ProfileGeneratorBase> Generator; | |||
124 | if (ProfileIsCS) { | |||
125 | if (Binary->useFSDiscriminator()) | |||
126 | exitWithError("FS discriminator is not supported in CS profile."); | |||
127 | Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); | |||
128 | } else { | |||
129 | Generator.reset(new ProfileGenerator(Binary, SampleCounters)); | |||
130 | } | |||
131 | ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); | |||
132 | FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); | |||
133 | ||||
134 | return Generator; | |||
135 | } | |||
136 | ||||
137 | std::unique_ptr<ProfileGeneratorBase> | |||
138 | ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles, | |||
139 | bool ProfileIsCS) { | |||
140 | std::unique_ptr<ProfileGeneratorBase> Generator; | |||
141 | if (ProfileIsCS) { | |||
142 | if (Binary->useFSDiscriminator()) | |||
143 | exitWithError("FS discriminator is not supported in CS profile."); | |||
144 | Generator.reset(new CSProfileGenerator(Binary, Profiles)); | |||
145 | } else { | |||
146 | Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); | |||
147 | } | |||
148 | ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); | |||
149 | FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); | |||
150 | ||||
151 | return Generator; | |||
152 | } | |||
153 | ||||
154 | void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, | |||
155 | SampleProfileMap &ProfileMap) { | |||
156 | // Populate profile symbol list if extended binary format is used. | |||
157 | ProfileSymbolList SymbolList; | |||
158 | ||||
159 | if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { | |||
160 | Binary->populateSymbolListFromDWARF(SymbolList); | |||
161 | Writer->setProfileSymbolList(&SymbolList); | |||
162 | } | |||
163 | ||||
164 | if (std::error_code EC = Writer->write(ProfileMap)) | |||
165 | exitWithError(std::move(EC)); | |||
166 | } | |||
167 | ||||
168 | void ProfileGeneratorBase::write() { | |||
169 | auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); | |||
170 | if (std::error_code EC = WriterOrErr.getError()) | |||
171 | exitWithError(EC, OutputFilename); | |||
172 | ||||
173 | if (UseMD5) { | |||
174 | if (OutputFormat != SPF_Ext_Binary) | |||
175 | WithColor::warning() << "-use-md5 is ignored. Specify " | |||
176 | "--format=extbinary to enable it\n"; | |||
177 | else | |||
178 | WriterOrErr.get()->setUseMD5(); | |||
179 | } | |||
180 | ||||
181 | write(std::move(WriterOrErr.get()), ProfileMap); | |||
182 | } | |||
183 | ||||
184 | void ProfileGeneratorBase::showDensitySuggestion(double Density) { | |||
185 | if (Density == 0.0) | |||
186 | WithColor::warning() << "The --profile-summary-cutoff-hot option may be " | |||
187 | "set too low. Please check your command.\n"; | |||
188 | else if (Density < HotFunctionDensityThreshold) | |||
189 | WithColor::warning() | |||
190 | << "AutoFDO is estimated to optimize better with " | |||
191 | << format("%.1f", HotFunctionDensityThreshold / Density) | |||
192 | << "x more samples. Please consider increasing sampling rate or " | |||
193 | "profiling for longer duration to get more samples.\n"; | |||
194 | ||||
195 | if (ShowDensity) | |||
196 | outs() << "Minimum profile density for hot functions with top " | |||
197 | << format("%.2f", | |||
198 | static_cast<double>(ProfileSummaryCutoffHot.getValue()) / | |||
199 | 10000) | |||
200 | << "% total samples: " << format("%.1f", Density) << "\n"; | |||
201 | } | |||
202 | ||||
203 | double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, | |||
204 | uint64_t HotCntThreshold) { | |||
205 | double Density = DBL_MAX1.7976931348623157e+308; | |||
206 | std::vector<const FunctionSamples *> HotFuncs; | |||
207 | for (auto &I : Profiles) { | |||
208 | auto &FuncSamples = I.second; | |||
209 | if (FuncSamples.getTotalSamples() < HotCntThreshold) | |||
210 | continue; | |||
211 | HotFuncs.emplace_back(&FuncSamples); | |||
212 | } | |||
213 | ||||
214 | for (auto *FuncSamples : HotFuncs) { | |||
215 | auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); | |||
216 | if (!Func) | |||
217 | continue; | |||
218 | uint64_t FuncSize = Func->getFuncSize(); | |||
219 | if (FuncSize == 0) | |||
220 | continue; | |||
221 | Density = | |||
222 | std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / | |||
223 | FuncSize); | |||
224 | } | |||
225 | ||||
226 | return Density == DBL_MAX1.7976931348623157e+308 ? 0.0 : Density; | |||
227 | } | |||
228 | ||||
229 | void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, | |||
230 | const RangeSample &Ranges) { | |||
231 | ||||
232 | /* | |||
233 | Regions may overlap with each other. Using the boundary info, find all | |||
234 | disjoint ranges and their sample count. BoundaryPoint contains the count | |||
235 | multiple samples begin/end at this points. | |||
236 | ||||
237 | |<--100-->| Sample1 | |||
238 | |<------200------>| Sample2 | |||
239 | A B C | |||
240 | ||||
241 | In the example above, | |||
242 | Sample1 begins at A, ends at B, its value is 100. | |||
243 | Sample2 beings at A, ends at C, its value is 200. | |||
244 | For A, BeginCount is the sum of sample begins at A, which is 300 and no | |||
245 | samples ends at A, so EndCount is 0. | |||
246 | Then boundary points A, B, and C with begin/end counts are: | |||
247 | A: (300, 0) | |||
248 | B: (0, 100) | |||
249 | C: (0, 200) | |||
250 | */ | |||
251 | struct BoundaryPoint { | |||
252 | // Sum of sample counts beginning at this point | |||
253 | uint64_t BeginCount = UINT64_MAX(18446744073709551615UL); | |||
254 | // Sum of sample counts ending at this point | |||
255 | uint64_t EndCount = UINT64_MAX(18446744073709551615UL); | |||
256 | // Is the begin point of a zero range. | |||
257 | bool IsZeroRangeBegin = false; | |||
258 | // Is the end point of a zero range. | |||
259 | bool IsZeroRangeEnd = false; | |||
260 | ||||
261 | void addBeginCount(uint64_t Count) { | |||
262 | if (BeginCount == UINT64_MAX(18446744073709551615UL)) | |||
263 | BeginCount = 0; | |||
264 | BeginCount += Count; | |||
265 | } | |||
266 | ||||
267 | void addEndCount(uint64_t Count) { | |||
268 | if (EndCount == UINT64_MAX(18446744073709551615UL)) | |||
269 | EndCount = 0; | |||
270 | EndCount += Count; | |||
271 | } | |||
272 | }; | |||
273 | ||||
274 | /* | |||
275 | For the above example. With boundary points, follwing logic finds two | |||
276 | disjoint region of | |||
277 | ||||
278 | [A,B]: 300 | |||
279 | [B+1,C]: 200 | |||
280 | ||||
281 | If there is a boundary point that both begin and end, the point itself | |||
282 | becomes a separate disjoint region. For example, if we have original | |||
283 | ranges of | |||
284 | ||||
285 | |<--- 100 --->| | |||
286 | |<--- 200 --->| | |||
287 | A B C | |||
288 | ||||
289 | there are three boundary points with their begin/end counts of | |||
290 | ||||
291 | A: (100, 0) | |||
292 | B: (200, 100) | |||
293 | C: (0, 200) | |||
294 | ||||
295 | the disjoint ranges would be | |||
296 | ||||
297 | [A, B-1]: 100 | |||
298 | [B, B]: 300 | |||
299 | [B+1, C]: 200. | |||
300 | ||||
301 | Example for zero value range: | |||
302 | ||||
303 | |<--- 100 --->| | |||
304 | |<--- 200 --->| | |||
305 | |<--------------- 0 ----------------->| | |||
306 | A B C D E F | |||
307 | ||||
308 | [A, B-1] : 0 | |||
309 | [B, C] : 100 | |||
310 | [C+1, D-1]: 0 | |||
311 | [D, E] : 200 | |||
312 | [E+1, F] : 0 | |||
313 | */ | |||
314 | std::map<uint64_t, BoundaryPoint> Boundaries; | |||
315 | ||||
316 | for (const auto &Item : Ranges) { | |||
317 | assert(Item.first.first <= Item.first.second &&(static_cast <bool> (Item.first.first <= Item.first. second && "Invalid instruction range") ? void (0) : __assert_fail ("Item.first.first <= Item.first.second && \"Invalid instruction range\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 318, __extension__ __PRETTY_FUNCTION__)) | |||
318 | "Invalid instruction range")(static_cast <bool> (Item.first.first <= Item.first. second && "Invalid instruction range") ? void (0) : __assert_fail ("Item.first.first <= Item.first.second && \"Invalid instruction range\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 318, __extension__ __PRETTY_FUNCTION__)); | |||
319 | auto &BeginPoint = Boundaries[Item.first.first]; | |||
320 | auto &EndPoint = Boundaries[Item.first.second]; | |||
321 | uint64_t Count = Item.second; | |||
322 | ||||
323 | BeginPoint.addBeginCount(Count); | |||
324 | EndPoint.addEndCount(Count); | |||
325 | if (Count == 0) { | |||
326 | BeginPoint.IsZeroRangeBegin = true; | |||
327 | EndPoint.IsZeroRangeEnd = true; | |||
328 | } | |||
329 | } | |||
330 | ||||
331 | // Use UINT64_MAX to indicate there is no existing range between BeginAddress | |||
332 | // and the next valid address | |||
333 | uint64_t BeginAddress = UINT64_MAX(18446744073709551615UL); | |||
334 | int ZeroRangeDepth = 0; | |||
335 | uint64_t Count = 0; | |||
336 | for (const auto &Item : Boundaries) { | |||
337 | uint64_t Address = Item.first; | |||
338 | const BoundaryPoint &Point = Item.second; | |||
339 | if (Point.BeginCount != UINT64_MAX(18446744073709551615UL)) { | |||
340 | if (BeginAddress != UINT64_MAX(18446744073709551615UL)) | |||
341 | DisjointRanges[{BeginAddress, Address - 1}] = Count; | |||
342 | Count += Point.BeginCount; | |||
343 | BeginAddress = Address; | |||
344 | ZeroRangeDepth += Point.IsZeroRangeBegin; | |||
345 | } | |||
346 | if (Point.EndCount != UINT64_MAX(18446744073709551615UL)) { | |||
347 | assert((BeginAddress != UINT64_MAX) &&(static_cast <bool> ((BeginAddress != (18446744073709551615UL )) && "First boundary point cannot be 'end' point") ? void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 348, __extension__ __PRETTY_FUNCTION__)) | |||
348 | "First boundary point cannot be 'end' point")(static_cast <bool> ((BeginAddress != (18446744073709551615UL )) && "First boundary point cannot be 'end' point") ? void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 348, __extension__ __PRETTY_FUNCTION__)); | |||
349 | DisjointRanges[{BeginAddress, Address}] = Count; | |||
350 | assert(Count >= Point.EndCount && "Mismatched live ranges")(static_cast <bool> (Count >= Point.EndCount && "Mismatched live ranges") ? void (0) : __assert_fail ("Count >= Point.EndCount && \"Mismatched live ranges\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 350, __extension__ __PRETTY_FUNCTION__)); | |||
351 | Count -= Point.EndCount; | |||
352 | BeginAddress = Address + 1; | |||
353 | ZeroRangeDepth -= Point.IsZeroRangeEnd; | |||
354 | // If the remaining count is zero and it's no longer in a zero range, this | |||
355 | // means we consume all the ranges before, thus mark BeginAddress as | |||
356 | // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: | |||
357 | // [<---- 10 ---->] | |||
358 | // [<---- 20 ---->] | |||
359 | // A B C D | |||
360 | // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't | |||
361 | // have the [B+1, C-1] zero range. | |||
362 | if (Count == 0 && ZeroRangeDepth == 0) | |||
363 | BeginAddress = UINT64_MAX(18446744073709551615UL); | |||
364 | } | |||
365 | } | |||
366 | } | |||
367 | ||||
368 | void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( | |||
369 | FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, | |||
370 | uint64_t Count) { | |||
371 | // Use the maximum count of samples with same line location | |||
372 | uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); | |||
373 | ||||
374 | // Use duplication factor to compensated for loop unroll/vectorization. | |||
375 | // Note that this is only needed when we're taking MAX of the counts at | |||
376 | // the location instead of SUM. | |||
377 | Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); | |||
378 | ||||
379 | ErrorOr<uint64_t> R = | |||
380 | FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); | |||
381 | ||||
382 | uint64_t PreviousCount = R ? R.get() : 0; | |||
383 | if (PreviousCount <= Count) { | |||
384 | FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, | |||
385 | Count - PreviousCount); | |||
386 | } | |||
387 | } | |||
388 | ||||
389 | void ProfileGeneratorBase::updateTotalSamples() { | |||
390 | for (auto &Item : ProfileMap) { | |||
391 | FunctionSamples &FunctionProfile = Item.second; | |||
392 | FunctionProfile.updateTotalSamples(); | |||
393 | } | |||
394 | } | |||
395 | ||||
396 | void ProfileGeneratorBase::updateCallsiteSamples() { | |||
397 | for (auto &Item : ProfileMap) { | |||
398 | FunctionSamples &FunctionProfile = Item.second; | |||
399 | FunctionProfile.updateCallsiteSamples(); | |||
400 | } | |||
401 | } | |||
402 | ||||
403 | void ProfileGeneratorBase::updateFunctionSamples() { | |||
404 | updateCallsiteSamples(); | |||
405 | ||||
406 | if (UpdateTotalSamples) | |||
407 | updateTotalSamples(); | |||
408 | } | |||
409 | ||||
410 | void ProfileGeneratorBase::collectProfiledFunctions() { | |||
411 | std::unordered_set<const BinaryFunction *> ProfiledFunctions; | |||
412 | if (collectFunctionsFromRawProfile(ProfiledFunctions)) | |||
413 | Binary->setProfiledFunctions(ProfiledFunctions); | |||
414 | else if (collectFunctionsFromLLVMProfile(ProfiledFunctions)) | |||
415 | Binary->setProfiledFunctions(ProfiledFunctions); | |||
416 | else | |||
417 | llvm_unreachable("Unsupported input profile")::llvm::llvm_unreachable_internal("Unsupported input profile" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 417); | |||
418 | } | |||
419 | ||||
420 | bool ProfileGeneratorBase::collectFunctionsFromRawProfile( | |||
421 | std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { | |||
422 | if (!SampleCounters) | |||
423 | return false; | |||
424 | // Go through all the stacks, ranges and branches in sample counters, use | |||
425 | // the start of the range to look up the function it belongs and record the | |||
426 | // function. | |||
427 | for (const auto &CI : *SampleCounters) { | |||
428 | if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { | |||
429 | for (auto StackAddr : CtxKey->Context) { | |||
430 | if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) | |||
431 | ProfiledFunctions.insert(FRange->Func); | |||
432 | } | |||
433 | } | |||
434 | ||||
435 | for (auto Item : CI.second.RangeCounter) { | |||
436 | uint64_t StartAddress = Item.first.first; | |||
437 | if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) | |||
438 | ProfiledFunctions.insert(FRange->Func); | |||
439 | } | |||
440 | ||||
441 | for (auto Item : CI.second.BranchCounter) { | |||
442 | uint64_t SourceAddress = Item.first.first; | |||
443 | uint64_t TargetAddress = Item.first.second; | |||
444 | if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) | |||
445 | ProfiledFunctions.insert(FRange->Func); | |||
446 | if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) | |||
447 | ProfiledFunctions.insert(FRange->Func); | |||
448 | } | |||
449 | } | |||
450 | return true; | |||
451 | } | |||
452 | ||||
453 | bool ProfileGenerator::collectFunctionsFromLLVMProfile( | |||
454 | std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { | |||
455 | for (const auto &FS : ProfileMap) { | |||
456 | if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) | |||
457 | ProfiledFunctions.insert(Func); | |||
458 | } | |||
459 | return true; | |||
460 | } | |||
461 | ||||
462 | bool CSProfileGenerator::collectFunctionsFromLLVMProfile( | |||
463 | std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { | |||
464 | for (auto *Node : ContextTracker) { | |||
465 | if (!Node->getFuncName().empty()) | |||
466 | if (auto *Func = Binary->getBinaryFunction(Node->getFuncName())) | |||
467 | ProfiledFunctions.insert(Func); | |||
468 | } | |||
469 | return true; | |||
470 | } | |||
471 | ||||
472 | FunctionSamples & | |||
473 | ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { | |||
474 | SampleContext Context(FuncName); | |||
475 | auto Ret = ProfileMap.emplace(Context, FunctionSamples()); | |||
476 | if (Ret.second) { | |||
477 | FunctionSamples &FProfile = Ret.first->second; | |||
478 | FProfile.setContext(Context); | |||
479 | } | |||
480 | return Ret.first->second; | |||
481 | } | |||
482 | ||||
483 | void ProfileGenerator::generateProfile() { | |||
484 | collectProfiledFunctions(); | |||
485 | ||||
486 | if (Binary->usePseudoProbes()) | |||
487 | Binary->decodePseudoProbe(); | |||
488 | ||||
489 | if (SampleCounters) { | |||
490 | if (Binary->usePseudoProbes()) { | |||
491 | generateProbeBasedProfile(); | |||
492 | } else { | |||
493 | generateLineNumBasedProfile(); | |||
494 | } | |||
495 | } | |||
496 | ||||
497 | postProcessProfiles(); | |||
498 | } | |||
499 | ||||
500 | void ProfileGenerator::postProcessProfiles() { | |||
501 | computeSummaryAndThreshold(ProfileMap); | |||
502 | trimColdProfiles(ProfileMap, ColdCountThreshold); | |||
503 | calculateAndShowDensity(ProfileMap); | |||
504 | } | |||
505 | ||||
506 | void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, | |||
507 | uint64_t ColdCntThreshold) { | |||
508 | if (!TrimColdProfile) | |||
509 | return; | |||
510 | ||||
511 | // Move cold profiles into a tmp container. | |||
512 | std::vector<SampleContext> ColdProfiles; | |||
513 | for (const auto &I : ProfileMap) { | |||
514 | if (I.second.getTotalSamples() < ColdCntThreshold) | |||
515 | ColdProfiles.emplace_back(I.first); | |||
516 | } | |||
517 | ||||
518 | // Remove the cold profile from ProfileMap. | |||
519 | for (const auto &I : ColdProfiles) | |||
520 | ProfileMap.erase(I); | |||
521 | } | |||
522 | ||||
523 | void ProfileGenerator::generateLineNumBasedProfile() { | |||
524 | assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 && "Must have one entry for profile generation.") ? void (0) : __assert_fail ("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 525, __extension__ __PRETTY_FUNCTION__)) | |||
525 | "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 && "Must have one entry for profile generation.") ? void (0) : __assert_fail ("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 525, __extension__ __PRETTY_FUNCTION__)); | |||
526 | const SampleCounter &SC = SampleCounters->begin()->second; | |||
527 | // Fill in function body samples | |||
528 | populateBodySamplesForAllFunctions(SC.RangeCounter); | |||
529 | // Fill in boundary sample counts as well as call site samples for calls | |||
530 | populateBoundarySamplesForAllFunctions(SC.BranchCounter); | |||
531 | ||||
532 | updateFunctionSamples(); | |||
533 | } | |||
534 | ||||
535 | void ProfileGenerator::generateProbeBasedProfile() { | |||
536 | assert(SampleCounters->size() == 1 &&(static_cast <bool> (SampleCounters->size() == 1 && "Must have one entry for profile generation.") ? void (0) : __assert_fail ("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 537, __extension__ __PRETTY_FUNCTION__)) | |||
537 | "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters->size() == 1 && "Must have one entry for profile generation.") ? void (0) : __assert_fail ("SampleCounters->size() == 1 && \"Must have one entry for profile generation.\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 537, __extension__ __PRETTY_FUNCTION__)); | |||
538 | // Enable pseudo probe functionalities in SampleProf | |||
539 | FunctionSamples::ProfileIsProbeBased = true; | |||
540 | const SampleCounter &SC = SampleCounters->begin()->second; | |||
541 | // Fill in function body samples | |||
542 | populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); | |||
543 | // Fill in boundary sample counts as well as call site samples for calls | |||
544 | populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); | |||
545 | ||||
546 | updateFunctionSamples(); | |||
547 | } | |||
548 | ||||
549 | void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( | |||
550 | const RangeSample &RangeCounter) { | |||
551 | ProbeCounterMap ProbeCounter; | |||
552 | // preprocessRangeCounter returns disjoint ranges, so no longer to redo it | |||
553 | // inside extractProbesFromRange. | |||
554 | extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, | |||
555 | false); | |||
556 | ||||
557 | for (const auto &PI : ProbeCounter) { | |||
558 | const MCDecodedPseudoProbe *Probe = PI.first; | |||
559 | uint64_t Count = PI.second; | |||
560 | SampleContextFrameVector FrameVec; | |||
561 | Binary->getInlineContextForProbe(Probe, FrameVec, true); | |||
562 | FunctionSamples &FunctionProfile = | |||
563 | getLeafProfileAndAddTotalSamples(FrameVec, Count); | |||
564 | FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); | |||
565 | if (Probe->isEntry()) | |||
566 | FunctionProfile.addHeadSamples(Count); | |||
567 | } | |||
568 | } | |||
569 | ||||
570 | void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( | |||
571 | const BranchSample &BranchCounters) { | |||
572 | for (const auto &Entry : BranchCounters) { | |||
573 | uint64_t SourceAddress = Entry.first.first; | |||
574 | uint64_t TargetAddress = Entry.first.second; | |||
575 | uint64_t Count = Entry.second; | |||
576 | assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch" ) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 576, __extension__ __PRETTY_FUNCTION__)); | |||
577 | ||||
578 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); | |||
579 | if (CalleeName.size() == 0) | |||
580 | continue; | |||
581 | ||||
582 | const MCDecodedPseudoProbe *CallProbe = | |||
583 | Binary->getCallProbeForAddr(SourceAddress); | |||
584 | if (CallProbe == nullptr) | |||
585 | continue; | |||
586 | ||||
587 | // Record called target sample and its count. | |||
588 | SampleContextFrameVector FrameVec; | |||
589 | Binary->getInlineContextForProbe(CallProbe, FrameVec, true); | |||
590 | ||||
591 | if (!FrameVec.empty()) { | |||
592 | FunctionSamples &FunctionProfile = | |||
593 | getLeafProfileAndAddTotalSamples(FrameVec, 0); | |||
594 | FunctionProfile.addCalledTargetSamples( | |||
595 | FrameVec.back().Location.LineOffset, 0, CalleeName, Count); | |||
596 | } | |||
597 | } | |||
598 | } | |||
599 | ||||
600 | FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( | |||
601 | const SampleContextFrameVector &FrameVec, uint64_t Count) { | |||
602 | // Get top level profile | |||
603 | FunctionSamples *FunctionProfile = | |||
604 | &getTopLevelFunctionProfile(FrameVec[0].FuncName); | |||
605 | FunctionProfile->addTotalSamples(Count); | |||
606 | if (Binary->usePseudoProbes()) { | |||
607 | const auto *FuncDesc = Binary->getFuncDescForGUID( | |||
608 | Function::getGUID(FunctionProfile->getName())); | |||
609 | FunctionProfile->setFunctionHash(FuncDesc->FuncHash); | |||
610 | } | |||
611 | ||||
612 | for (size_t I = 1; I < FrameVec.size(); I++) { | |||
613 | LineLocation Callsite( | |||
614 | FrameVec[I - 1].Location.LineOffset, | |||
615 | getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); | |||
616 | FunctionSamplesMap &SamplesMap = | |||
617 | FunctionProfile->functionSamplesAt(Callsite); | |||
618 | auto Ret = | |||
619 | SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); | |||
620 | if (Ret.second) { | |||
621 | SampleContext Context(FrameVec[I].FuncName); | |||
622 | Ret.first->second.setContext(Context); | |||
623 | } | |||
624 | FunctionProfile = &Ret.first->second; | |||
625 | FunctionProfile->addTotalSamples(Count); | |||
626 | if (Binary->usePseudoProbes()) { | |||
627 | const auto *FuncDesc = Binary->getFuncDescForGUID( | |||
628 | Function::getGUID(FunctionProfile->getName())); | |||
629 | FunctionProfile->setFunctionHash(FuncDesc->FuncHash); | |||
630 | } | |||
631 | } | |||
632 | ||||
633 | return *FunctionProfile; | |||
634 | } | |||
635 | ||||
636 | RangeSample | |||
637 | ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { | |||
638 | RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); | |||
639 | if (FillZeroForAllFuncs) { | |||
640 | for (auto &FuncI : Binary->getAllBinaryFunctions()) { | |||
641 | for (auto &R : FuncI.second.Ranges) { | |||
642 | Ranges[{R.first, R.second - 1}] += 0; | |||
643 | } | |||
644 | } | |||
645 | } else { | |||
646 | // For each range, we search for all ranges of the function it belongs to | |||
647 | // and initialize it with zero count, so it remains zero if doesn't hit any | |||
648 | // samples. This is to be consistent with compiler that interpret zero count | |||
649 | // as unexecuted(cold). | |||
650 | for (const auto &I : RangeCounter) { | |||
651 | uint64_t StartAddress = I.first.first; | |||
652 | for (const auto &Range : Binary->getRanges(StartAddress)) | |||
653 | Ranges[{Range.first, Range.second - 1}] += 0; | |||
654 | } | |||
655 | } | |||
656 | RangeSample DisjointRanges; | |||
657 | findDisjointRanges(DisjointRanges, Ranges); | |||
658 | return DisjointRanges; | |||
659 | } | |||
660 | ||||
661 | void ProfileGenerator::populateBodySamplesForAllFunctions( | |||
662 | const RangeSample &RangeCounter) { | |||
663 | for (const auto &Range : preprocessRangeCounter(RangeCounter)) { | |||
664 | uint64_t RangeBegin = Range.first.first; | |||
665 | uint64_t RangeEnd = Range.first.second; | |||
666 | uint64_t Count = Range.second; | |||
667 | ||||
668 | InstructionPointer IP(Binary, RangeBegin, true); | |||
669 | // Disjoint ranges may have range in the middle of two instr, | |||
670 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range | |||
671 | // can be Addr1+1 to Addr2-1. We should ignore such range. | |||
672 | if (IP.Address > RangeEnd) | |||
673 | continue; | |||
674 | ||||
675 | do { | |||
676 | const SampleContextFrameVector FrameVec = | |||
677 | Binary->getFrameLocationStack(IP.Address); | |||
678 | if (!FrameVec.empty()) { | |||
679 | // FIXME: As accumulating total count per instruction caused some | |||
680 | // regression, we changed to accumulate total count per byte as a | |||
681 | // workaround. Tuning hotness threshold on the compiler side might be | |||
682 | // necessary in the future. | |||
683 | FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( | |||
684 | FrameVec, Count * Binary->getInstSize(IP.Address)); | |||
685 | updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), | |||
686 | Count); | |||
687 | } | |||
688 | } while (IP.advance() && IP.Address <= RangeEnd); | |||
689 | } | |||
690 | } | |||
691 | ||||
692 | StringRef | |||
693 | ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) { | |||
694 | // Get the function range by branch target if it's a call branch. | |||
695 | auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress); | |||
696 | ||||
697 | // We won't accumulate sample count for a range whose start is not the real | |||
698 | // function entry such as outlined function or inner labels. | |||
699 | if (!FRange || !FRange->IsFuncEntry) | |||
700 | return StringRef(); | |||
701 | ||||
702 | return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); | |||
703 | } | |||
704 | ||||
705 | void ProfileGenerator::populateBoundarySamplesForAllFunctions( | |||
706 | const BranchSample &BranchCounters) { | |||
707 | for (const auto &Entry : BranchCounters) { | |||
708 | uint64_t SourceAddress = Entry.first.first; | |||
709 | uint64_t TargetAddress = Entry.first.second; | |||
710 | uint64_t Count = Entry.second; | |||
711 | assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch" ) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 711, __extension__ __PRETTY_FUNCTION__)); | |||
712 | ||||
713 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); | |||
714 | if (CalleeName.size() == 0) | |||
715 | continue; | |||
716 | // Record called target sample and its count. | |||
717 | const SampleContextFrameVector &FrameVec = | |||
718 | Binary->getCachedFrameLocationStack(SourceAddress); | |||
719 | if (!FrameVec.empty()) { | |||
720 | FunctionSamples &FunctionProfile = | |||
721 | getLeafProfileAndAddTotalSamples(FrameVec, 0); | |||
722 | FunctionProfile.addCalledTargetSamples( | |||
723 | FrameVec.back().Location.LineOffset, | |||
724 | getBaseDiscriminator(FrameVec.back().Location.Discriminator), | |||
725 | CalleeName, Count); | |||
726 | } | |||
727 | // Add head samples for callee. | |||
728 | FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); | |||
729 | CalleeProfile.addHeadSamples(Count); | |||
730 | } | |||
731 | } | |||
732 | ||||
733 | void ProfileGeneratorBase::calculateAndShowDensity( | |||
734 | const SampleProfileMap &Profiles) { | |||
735 | double Density = calculateDensity(Profiles, HotCountThreshold); | |||
736 | showDensitySuggestion(Density); | |||
737 | } | |||
738 | ||||
739 | FunctionSamples * | |||
740 | CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, | |||
741 | bool WasLeafInlined) { | |||
742 | FunctionSamples *FProfile = ContextNode->getFunctionSamples(); | |||
743 | if (!FProfile) { | |||
744 | FSamplesList.emplace_back(); | |||
745 | FProfile = &FSamplesList.back(); | |||
746 | FProfile->setName(ContextNode->getFuncName()); | |||
747 | ContextNode->setFunctionSamples(FProfile); | |||
748 | } | |||
749 | // Update ContextWasInlined attribute for existing contexts. | |||
750 | // The current function can be called in two ways: | |||
751 | // - when processing a probe of the current frame | |||
752 | // - when processing the entry probe of an inlinee's frame, which | |||
753 | // is then used to update the callsite count of the current frame. | |||
754 | // The two can happen in any order, hence here we are making sure | |||
755 | // `ContextWasInlined` is always set as expected. | |||
756 | // TODO: Note that the former does not always happen if no probes of the | |||
757 | // current frame has samples, and if the latter happens, we could lose the | |||
758 | // attribute. This should be fixed. | |||
759 | if (WasLeafInlined) | |||
760 | FProfile->getContext().setAttribute(ContextWasInlined); | |||
761 | return FProfile; | |||
762 | } | |||
763 | ||||
764 | ContextTrieNode * | |||
765 | CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context, | |||
766 | bool WasLeafInlined) { | |||
767 | ContextTrieNode *ContextNode = | |||
768 | ContextTracker.getOrCreateContextPath(Context, true); | |||
769 | getOrCreateFunctionSamples(ContextNode, WasLeafInlined); | |||
770 | return ContextNode; | |||
771 | } | |||
772 | ||||
773 | void CSProfileGenerator::generateProfile() { | |||
774 | FunctionSamples::ProfileIsCS = true; | |||
775 | ||||
776 | collectProfiledFunctions(); | |||
777 | ||||
778 | if (Binary->usePseudoProbes()) { | |||
| ||||
779 | Binary->decodePseudoProbe(); | |||
780 | if (InferMissingFrames) | |||
781 | initializeMissingFrameInferrer(); | |||
782 | } | |||
783 | ||||
784 | if (SampleCounters
| |||
785 | if (Binary->usePseudoProbes()) { | |||
786 | generateProbeBasedProfile(); | |||
787 | } else { | |||
788 | generateLineNumBasedProfile(); | |||
789 | } | |||
790 | } | |||
791 | ||||
792 | if (Binary->getTrackFuncContextSize()) | |||
793 | computeSizeForProfiledFunctions(); | |||
794 | ||||
795 | postProcessProfiles(); | |||
796 | } | |||
797 | ||||
798 | void CSProfileGenerator::initializeMissingFrameInferrer() { | |||
799 | Binary->getMissingContextInferrer()->initialize(SampleCounters); | |||
800 | } | |||
801 | ||||
802 | void CSProfileGenerator::inferMissingFrames( | |||
803 | const SmallVectorImpl<uint64_t> &Context, | |||
804 | SmallVectorImpl<uint64_t> &NewContext) { | |||
805 | Binary->inferMissingFrames(Context, NewContext); | |||
806 | } | |||
807 | ||||
808 | void CSProfileGenerator::computeSizeForProfiledFunctions() { | |||
809 | for (auto *Func : Binary->getProfiledFunctions()) | |||
810 | Binary->computeInlinedContextSizeForFunc(Func); | |||
811 | ||||
812 | // Flush the symbolizer to save memory. | |||
813 | Binary->flushSymbolizer(); | |||
814 | } | |||
815 | ||||
816 | void CSProfileGenerator::updateFunctionSamples() { | |||
817 | for (auto *Node : ContextTracker) { | |||
818 | FunctionSamples *FSamples = Node->getFunctionSamples(); | |||
819 | if (FSamples) { | |||
820 | if (UpdateTotalSamples) | |||
821 | FSamples->updateTotalSamples(); | |||
822 | FSamples->updateCallsiteSamples(); | |||
823 | } | |||
824 | } | |||
825 | } | |||
826 | ||||
827 | void CSProfileGenerator::generateLineNumBasedProfile() { | |||
828 | for (const auto &CI : *SampleCounters) { | |||
829 | const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); | |||
830 | ||||
831 | ContextTrieNode *ContextNode = &getRootContext(); | |||
832 | // Sample context will be empty if the jump is an external-to-internal call | |||
833 | // pattern, the head samples should be added for the internal function. | |||
834 | if (!CtxKey->Context.empty()) { | |||
835 | // Get or create function profile for the range | |||
836 | ContextNode = | |||
837 | getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined); | |||
838 | // Fill in function body samples | |||
839 | populateBodySamplesForFunction(*ContextNode->getFunctionSamples(), | |||
840 | CI.second.RangeCounter); | |||
841 | } | |||
842 | // Fill in boundary sample counts as well as call site samples for calls | |||
843 | populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter); | |||
844 | } | |||
845 | // Fill in call site value sample for inlined calls and also use context to | |||
846 | // infer missing samples. Since we don't have call count for inlined | |||
847 | // functions, we estimate it from inlinee's profile using the entry of the | |||
848 | // body sample. | |||
849 | populateInferredFunctionSamples(getRootContext()); | |||
850 | ||||
851 | updateFunctionSamples(); | |||
852 | } | |||
853 | ||||
854 | void CSProfileGenerator::populateBodySamplesForFunction( | |||
855 | FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { | |||
856 | // Compute disjoint ranges first, so we can use MAX | |||
857 | // for calculating count for each location. | |||
858 | RangeSample Ranges; | |||
859 | findDisjointRanges(Ranges, RangeCounter); | |||
860 | for (const auto &Range : Ranges) { | |||
861 | uint64_t RangeBegin = Range.first.first; | |||
862 | uint64_t RangeEnd = Range.first.second; | |||
863 | uint64_t Count = Range.second; | |||
864 | // Disjoint ranges have introduce zero-filled gap that | |||
865 | // doesn't belong to current context, filter them out. | |||
866 | if (Count == 0) | |||
867 | continue; | |||
868 | ||||
869 | InstructionPointer IP(Binary, RangeBegin, true); | |||
870 | // Disjoint ranges may have range in the middle of two instr, | |||
871 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range | |||
872 | // can be Addr1+1 to Addr2-1. We should ignore such range. | |||
873 | if (IP.Address > RangeEnd) | |||
874 | continue; | |||
875 | ||||
876 | do { | |||
877 | auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address); | |||
878 | if (LeafLoc) { | |||
879 | // Recording body sample for this specific context | |||
880 | updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); | |||
881 | FunctionProfile.addTotalSamples(Count); | |||
882 | } | |||
883 | } while (IP.advance() && IP.Address <= RangeEnd); | |||
884 | } | |||
885 | } | |||
886 | ||||
887 | void CSProfileGenerator::populateBoundarySamplesForFunction( | |||
888 | ContextTrieNode *Node, const BranchSample &BranchCounters) { | |||
889 | ||||
890 | for (const auto &Entry : BranchCounters) { | |||
891 | uint64_t SourceAddress = Entry.first.first; | |||
892 | uint64_t TargetAddress = Entry.first.second; | |||
893 | uint64_t Count = Entry.second; | |||
894 | assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch" ) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 894, __extension__ __PRETTY_FUNCTION__)); | |||
895 | ||||
896 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); | |||
897 | if (CalleeName.size() == 0) | |||
898 | continue; | |||
899 | ||||
900 | ContextTrieNode *CallerNode = Node; | |||
901 | LineLocation CalleeCallSite(0, 0); | |||
902 | if (CallerNode != &getRootContext()) { | |||
903 | // Record called target sample and its count | |||
904 | auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress); | |||
905 | if (LeafLoc) { | |||
906 | CallerNode->getFunctionSamples()->addCalledTargetSamples( | |||
907 | LeafLoc->Location.LineOffset, | |||
908 | getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, | |||
909 | Count); | |||
910 | // Record head sample for called target(callee) | |||
911 | CalleeCallSite = LeafLoc->Location; | |||
912 | } | |||
913 | } | |||
914 | ||||
915 | ContextTrieNode *CalleeNode = | |||
916 | CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName); | |||
917 | FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode); | |||
918 | CalleeProfile->addHeadSamples(Count); | |||
919 | } | |||
920 | } | |||
921 | ||||
922 | void CSProfileGenerator::populateInferredFunctionSamples( | |||
923 | ContextTrieNode &Node) { | |||
924 | // There is no call jmp sample between the inliner and inlinee, we need to use | |||
925 | // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s | |||
926 | // sample depends on child(inlinee)'s sample, so traverse the tree in | |||
927 | // post-order. | |||
928 | for (auto &It : Node.getAllChildContext()) | |||
929 | populateInferredFunctionSamples(It.second); | |||
930 | ||||
931 | FunctionSamples *CalleeProfile = Node.getFunctionSamples(); | |||
932 | if (!CalleeProfile) | |||
933 | return; | |||
934 | // If we already have head sample counts, we must have value profile | |||
935 | // for call sites added already. Skip to avoid double counting. | |||
936 | if (CalleeProfile->getHeadSamples()) | |||
937 | return; | |||
938 | ContextTrieNode *CallerNode = Node.getParentContext(); | |||
939 | // If we don't have context, nothing to do for caller's call site. | |||
940 | // This could happen for entry point function. | |||
941 | if (CallerNode == &getRootContext()) | |||
942 | return; | |||
943 | ||||
944 | LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc(); | |||
945 | FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); | |||
946 | // Since we don't have call count for inlined functions, we | |||
947 | // estimate it from inlinee's profile using entry body sample. | |||
948 | uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate(); | |||
949 | // If we don't have samples with location, use 1 to indicate live. | |||
950 | if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) | |||
951 | EstimatedCallCount = 1; | |||
952 | CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset, | |||
953 | CallerLeafFrameLoc.Discriminator, | |||
954 | Node.getFuncName(), EstimatedCallCount); | |||
955 | CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset, | |||
956 | CallerLeafFrameLoc.Discriminator, | |||
957 | EstimatedCallCount); | |||
958 | CallerProfile.addTotalSamples(EstimatedCallCount); | |||
959 | } | |||
960 | ||||
961 | void CSProfileGenerator::convertToProfileMap( | |||
962 | ContextTrieNode &Node, SampleContextFrameVector &Context) { | |||
963 | FunctionSamples *FProfile = Node.getFunctionSamples(); | |||
964 | if (FProfile) { | |||
965 | Context.emplace_back(Node.getFuncName(), LineLocation(0, 0)); | |||
966 | // Save the new context for future references. | |||
967 | SampleContextFrames NewContext = *Contexts.insert(Context).first; | |||
968 | auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile)); | |||
969 | FunctionSamples &NewProfile = Ret.first->second; | |||
970 | NewProfile.getContext().setContext(NewContext); | |||
971 | Context.pop_back(); | |||
972 | } | |||
973 | ||||
974 | for (auto &It : Node.getAllChildContext()) { | |||
975 | ContextTrieNode &ChildNode = It.second; | |||
976 | Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc()); | |||
977 | convertToProfileMap(ChildNode, Context); | |||
978 | Context.pop_back(); | |||
979 | } | |||
980 | } | |||
981 | ||||
982 | void CSProfileGenerator::convertToProfileMap() { | |||
983 | assert(ProfileMap.empty() &&(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie" ) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 984, __extension__ __PRETTY_FUNCTION__)) | |||
984 | "ProfileMap should be empty before converting from the trie")(static_cast <bool> (ProfileMap.empty() && "ProfileMap should be empty before converting from the trie" ) ? void (0) : __assert_fail ("ProfileMap.empty() && \"ProfileMap should be empty before converting from the trie\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 984, __extension__ __PRETTY_FUNCTION__)); | |||
985 | assert(IsProfileValidOnTrie &&(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed" ) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 986, __extension__ __PRETTY_FUNCTION__)) | |||
986 | "Do not convert the trie twice, it's already destroyed")(static_cast <bool> (IsProfileValidOnTrie && "Do not convert the trie twice, it's already destroyed" ) ? void (0) : __assert_fail ("IsProfileValidOnTrie && \"Do not convert the trie twice, it's already destroyed\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 986, __extension__ __PRETTY_FUNCTION__)); | |||
987 | ||||
988 | SampleContextFrameVector Context; | |||
989 | for (auto &It : getRootContext().getAllChildContext()) | |||
990 | convertToProfileMap(It.second, Context); | |||
991 | ||||
992 | IsProfileValidOnTrie = false; | |||
993 | } | |||
994 | ||||
995 | void CSProfileGenerator::postProcessProfiles() { | |||
996 | // Compute hot/cold threshold based on profile. This will be used for cold | |||
997 | // context profile merging/trimming. | |||
998 | computeSummaryAndThreshold(); | |||
999 | ||||
1000 | // Run global pre-inliner to adjust/merge context profile based on estimated | |||
1001 | // inline decisions. | |||
1002 | if (EnableCSPreInliner) { | |||
1003 | ContextTracker.populateFuncToCtxtMap(); | |||
1004 | CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); | |||
1005 | // Turn off the profile merger by default unless it is explicitly enabled. | |||
1006 | if (!CSProfMergeColdContext.getNumOccurrences()) | |||
1007 | CSProfMergeColdContext = false; | |||
1008 | } | |||
1009 | ||||
1010 | convertToProfileMap(); | |||
1011 | ||||
1012 | // Trim and merge cold context profile using cold threshold above. | |||
1013 | if (TrimColdProfile || CSProfMergeColdContext) { | |||
1014 | SampleContextTrimmer(ProfileMap) | |||
1015 | .trimAndMergeColdContextProfiles( | |||
1016 | HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, | |||
1017 | CSProfMaxColdContextDepth, EnableCSPreInliner); | |||
1018 | } | |||
1019 | ||||
1020 | // Merge function samples of CS profile to calculate profile density. | |||
1021 | sampleprof::SampleProfileMap ContextLessProfiles; | |||
1022 | for (const auto &I : ProfileMap) { | |||
1023 | ContextLessProfiles[I.second.getName()].merge(I.second); | |||
1024 | } | |||
1025 | ||||
1026 | calculateAndShowDensity(ContextLessProfiles); | |||
1027 | if (GenCSNestedProfile) { | |||
1028 | ProfileConverter CSConverter(ProfileMap); | |||
1029 | CSConverter.convertCSProfiles(); | |||
1030 | FunctionSamples::ProfileIsCS = false; | |||
1031 | } | |||
1032 | } | |||
1033 | ||||
1034 | void ProfileGeneratorBase::computeSummaryAndThreshold( | |||
1035 | SampleProfileMap &Profiles) { | |||
1036 | SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); | |||
1037 | Summary = Builder.computeSummaryForProfiles(Profiles); | |||
1038 | HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( | |||
1039 | (Summary->getDetailedSummary())); | |||
1040 | ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( | |||
1041 | (Summary->getDetailedSummary())); | |||
1042 | } | |||
1043 | ||||
1044 | void CSProfileGenerator::computeSummaryAndThreshold() { | |||
1045 | // Always merge and use context-less profile map to compute summary. | |||
1046 | SampleProfileMap ContextLessProfiles; | |||
1047 | ContextTracker.createContextLessProfileMap(ContextLessProfiles); | |||
1048 | ||||
1049 | // Set the flag below to avoid merging the profile again in | |||
1050 | // computeSummaryAndThreshold | |||
1051 | FunctionSamples::ProfileIsCS = false; | |||
1052 | assert((static_cast <bool> ((!UseContextLessSummary.getNumOccurrences () || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile " "generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__ __PRETTY_FUNCTION__)) | |||
1053 | (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences () || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile " "generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__ __PRETTY_FUNCTION__)) | |||
1054 | "Don't set --profile-summary-contextless to false for profile "(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences () || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile " "generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__ __PRETTY_FUNCTION__)) | |||
1055 | "generation")(static_cast <bool> ((!UseContextLessSummary.getNumOccurrences () || UseContextLessSummary) && "Don't set --profile-summary-contextless to false for profile " "generation") ? void (0) : __assert_fail ("(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && \"Don't set --profile-summary-contextless to false for profile \" \"generation\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1055, __extension__ __PRETTY_FUNCTION__)); | |||
1056 | ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles); | |||
1057 | // Recover the old value. | |||
1058 | FunctionSamples::ProfileIsCS = true; | |||
1059 | } | |||
1060 | ||||
1061 | void ProfileGeneratorBase::extractProbesFromRange( | |||
1062 | const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, | |||
1063 | bool FindDisjointRanges) { | |||
1064 | const RangeSample *PRanges = &RangeCounter; | |||
1065 | RangeSample Ranges; | |||
1066 | if (FindDisjointRanges) { | |||
1067 | findDisjointRanges(Ranges, RangeCounter); | |||
1068 | PRanges = &Ranges; | |||
1069 | } | |||
1070 | ||||
1071 | for (const auto &Range : *PRanges) { | |||
1072 | uint64_t RangeBegin = Range.first.first; | |||
1073 | uint64_t RangeEnd = Range.first.second; | |||
1074 | uint64_t Count = Range.second; | |||
1075 | ||||
1076 | InstructionPointer IP(Binary, RangeBegin, true); | |||
1077 | // Disjoint ranges may have range in the middle of two instr, | |||
1078 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range | |||
1079 | // can be Addr1+1 to Addr2-1. We should ignore such range. | |||
1080 | if (IP.Address > RangeEnd) | |||
1081 | continue; | |||
1082 | ||||
1083 | do { | |||
1084 | const AddressProbesMap &Address2ProbesMap = | |||
1085 | Binary->getAddress2ProbesMap(); | |||
1086 | auto It = Address2ProbesMap.find(IP.Address); | |||
1087 | if (It != Address2ProbesMap.end()) { | |||
1088 | for (const auto &Probe : It->second) { | |||
1089 | ProbeCounter[&Probe] += Count; | |||
1090 | } | |||
1091 | } | |||
1092 | } while (IP.advance() && IP.Address <= RangeEnd); | |||
1093 | } | |||
1094 | } | |||
1095 | ||||
1096 | static void extractPrefixContextStack(SampleContextFrameVector &ContextStack, | |||
1097 | const SmallVectorImpl<uint64_t> &AddrVec, | |||
1098 | ProfiledBinary *Binary) { | |||
1099 | SmallVector<const MCDecodedPseudoProbe *, 16> Probes; | |||
1100 | for (auto Address : reverse(AddrVec)) { | |||
1101 | const MCDecodedPseudoProbe *CallProbe = | |||
1102 | Binary->getCallProbeForAddr(Address); | |||
1103 | // These could be the cases when a probe is not found at a calliste. Cutting | |||
1104 | // off the context from here since the inliner will not know how to consume | |||
1105 | // a context with unknown callsites. | |||
1106 | // 1. for functions that are not sampled when | |||
1107 | // --decode-probe-for-profiled-functions-only is on. | |||
1108 | // 2. for a merged callsite. Callsite merging may cause the loss of original | |||
1109 | // probe IDs. | |||
1110 | // 3. for an external callsite. | |||
1111 | if (!CallProbe) | |||
1112 | break; | |||
1113 | Probes.push_back(CallProbe); | |||
1114 | } | |||
1115 | ||||
1116 | std::reverse(Probes.begin(), Probes.end()); | |||
1117 | ||||
1118 | // Extract context stack for reusing, leaf context stack will be added | |||
1119 | // compressed while looking up function profile. | |||
1120 | for (const auto *P : Probes) { | |||
1121 | Binary->getInlineContextForProbe(P, ContextStack, true); | |||
1122 | } | |||
1123 | } | |||
1124 | ||||
1125 | void CSProfileGenerator::generateProbeBasedProfile() { | |||
1126 | // Enable pseudo probe functionalities in SampleProf | |||
1127 | FunctionSamples::ProfileIsProbeBased = true; | |||
1128 | for (const auto &CI : *SampleCounters) { | |||
1129 | const AddrBasedCtxKey *CtxKey = | |||
1130 | dyn_cast<AddrBasedCtxKey>(CI.first.getPtr()); | |||
1131 | // Fill in function body samples from probes, also infer caller's samples | |||
1132 | // from callee's probe | |||
1133 | populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey); | |||
1134 | // Fill in boundary samples for a call probe | |||
1135 | populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey); | |||
1136 | } | |||
1137 | } | |||
1138 | ||||
1139 | void CSProfileGenerator::populateBodySamplesWithProbes( | |||
1140 | const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) { | |||
1141 | ProbeCounterMap ProbeCounter; | |||
1142 | // Extract the top frame probes by looking up each address among the range in | |||
1143 | // the Address2ProbeMap | |||
1144 | extractProbesFromRange(RangeCounter, ProbeCounter); | |||
1145 | std::unordered_map<MCDecodedPseudoProbeInlineTree *, | |||
1146 | std::unordered_set<FunctionSamples *>> | |||
1147 | FrameSamples; | |||
1148 | for (const auto &PI : ProbeCounter) { | |||
1149 | const MCDecodedPseudoProbe *Probe = PI.first; | |||
1150 | uint64_t Count = PI.second; | |||
1151 | // Disjoint ranges have introduce zero-filled gap that | |||
1152 | // doesn't belong to current context, filter them out. | |||
1153 | if (!Probe->isBlock() || Count == 0) | |||
1154 | continue; | |||
1155 | ||||
1156 | ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe); | |||
1157 | FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); | |||
1158 | // Record the current frame and FunctionProfile whenever samples are | |||
1159 | // collected for non-danglie probes. This is for reporting all of the | |||
1160 | // zero count probes of the frame later. | |||
1161 | FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); | |||
1162 | FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); | |||
1163 | FunctionProfile.addTotalSamples(Count); | |||
1164 | if (Probe->isEntry()) { | |||
1165 | FunctionProfile.addHeadSamples(Count); | |||
1166 | // Look up for the caller's function profile | |||
1167 | const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); | |||
1168 | ContextTrieNode *CallerNode = ContextNode->getParentContext(); | |||
1169 | if (InlinerDesc != nullptr && CallerNode != &getRootContext()) { | |||
1170 | // Since the context id will be compressed, we have to use callee's | |||
1171 | // context id to infer caller's context id to ensure they share the | |||
1172 | // same context prefix. | |||
1173 | uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset; | |||
1174 | assert(CallerIndex &&(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!" ) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1175, __extension__ __PRETTY_FUNCTION__)) | |||
1175 | "Inferred caller's location index shouldn't be zero!")(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!" ) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 1175, __extension__ __PRETTY_FUNCTION__)); | |||
1176 | FunctionSamples &CallerProfile = | |||
1177 | *getOrCreateFunctionSamples(CallerNode); | |||
1178 | CallerProfile.setFunctionHash(InlinerDesc->FuncHash); | |||
1179 | CallerProfile.addBodySamples(CallerIndex, 0, Count); | |||
1180 | CallerProfile.addTotalSamples(Count); | |||
1181 | CallerProfile.addCalledTargetSamples(CallerIndex, 0, | |||
1182 | ContextNode->getFuncName(), Count); | |||
1183 | } | |||
1184 | } | |||
1185 | } | |||
1186 | ||||
1187 | // Assign zero count for remaining probes without sample hits to | |||
1188 | // differentiate from probes optimized away, of which the counts are unknown | |||
1189 | // and will be inferred by the compiler. | |||
1190 | for (auto &I : FrameSamples) { | |||
1191 | for (auto *FunctionProfile : I.second) { | |||
1192 | for (auto *Probe : I.first->getProbes()) { | |||
1193 | FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); | |||
1194 | } | |||
1195 | } | |||
1196 | } | |||
1197 | } | |||
1198 | ||||
1199 | void CSProfileGenerator::populateBoundarySamplesWithProbes( | |||
1200 | const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) { | |||
1201 | for (const auto &BI : BranchCounter) { | |||
1202 | uint64_t SourceAddress = BI.first.first; | |||
1203 | uint64_t TargetAddress = BI.first.second; | |||
1204 | uint64_t Count = BI.second; | |||
1205 | const MCDecodedPseudoProbe *CallProbe = | |||
1206 | Binary->getCallProbeForAddr(SourceAddress); | |||
1207 | if (CallProbe == nullptr) | |||
1208 | continue; | |||
1209 | FunctionSamples &FunctionProfile = | |||
1210 | getFunctionProfileForLeafProbe(CtxKey, CallProbe); | |||
1211 | FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); | |||
1212 | FunctionProfile.addTotalSamples(Count); | |||
1213 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); | |||
1214 | if (CalleeName.size() == 0) | |||
1215 | continue; | |||
1216 | FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, | |||
1217 | Count); | |||
1218 | } | |||
1219 | } | |||
1220 | ||||
1221 | ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( | |||
1222 | const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { | |||
1223 | ||||
1224 | const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context; | |||
1225 | SmallVector<uint64_t, 16> NewContext; | |||
1226 | ||||
1227 | if (InferMissingFrames) { | |||
1228 | SmallVector<uint64_t, 16> Context = CtxKey->Context; | |||
1229 | // Append leaf frame for a complete inference. | |||
1230 | Context.push_back(LeafProbe->getAddress()); | |||
1231 | inferMissingFrames(Context, NewContext); | |||
1232 | // Pop out the leaf probe that was pushed in above. | |||
1233 | NewContext.pop_back(); | |||
1234 | PContext = &NewContext; | |||
1235 | } | |||
1236 | ||||
1237 | SampleContextFrameVector ContextStack; | |||
1238 | extractPrefixContextStack(ContextStack, *PContext, Binary); | |||
| ||||
1239 | ||||
1240 | // Explicitly copy the context for appending the leaf context | |||
1241 | SampleContextFrameVector NewContextStack(ContextStack.begin(), | |||
1242 | ContextStack.end()); | |||
1243 | Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); | |||
1244 | // For leaf inlined context with the top frame, we should strip off the top | |||
1245 | // frame's probe id, like: | |||
1246 | // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" | |||
1247 | auto LeafFrame = NewContextStack.back(); | |||
1248 | LeafFrame.Location = LineLocation(0, 0); | |||
1249 | NewContextStack.pop_back(); | |||
1250 | // Compress the context string except for the leaf frame | |||
1251 | CSProfileGenerator::compressRecursionContext(NewContextStack); | |||
1252 | CSProfileGenerator::trimContext(NewContextStack); | |||
1253 | NewContextStack.push_back(LeafFrame); | |||
1254 | ||||
1255 | const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); | |||
1256 | bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); | |||
1257 | ContextTrieNode *ContextNode = | |||
1258 | getOrCreateContextNode(NewContextStack, WasLeafInlined); | |||
1259 | ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); | |||
1260 | return ContextNode; | |||
1261 | } | |||
1262 | ||||
1263 | FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( | |||
1264 | const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { | |||
1265 | return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples(); | |||
1266 | } | |||
1267 | ||||
1268 | } // end namespace sampleprof | |||
1269 | } // end namespace llvm |