File: | llvm/tools/llvm-profgen/ProfileGenerator.cpp |
Warning: | line 826, column 5 Forming reference to null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "ProfileGenerator.h" | |||
10 | #include "ErrorHandling.h" | |||
11 | #include "ProfiledBinary.h" | |||
12 | #include "llvm/ProfileData/ProfileCommon.h" | |||
13 | #include <float.h> | |||
14 | #include <unordered_set> | |||
15 | ||||
16 | cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), | |||
17 | cl::Required, | |||
18 | cl::desc("Output profile file")); | |||
19 | static cl::alias OutputA("o", cl::desc("Alias for --output"), | |||
20 | cl::aliasopt(OutputFilename)); | |||
21 | ||||
22 | static cl::opt<SampleProfileFormat> OutputFormat( | |||
23 | "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), | |||
24 | cl::values( | |||
25 | clEnumValN(SPF_Binary, "binary", "Binary encoding (default)")llvm::cl::OptionEnumValue { "binary", int(SPF_Binary), "Binary encoding (default)" }, | |||
26 | clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding")llvm::cl::OptionEnumValue { "compbinary", int(SPF_Compact_Binary ), "Compact binary encoding" }, | |||
27 | clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding")llvm::cl::OptionEnumValue { "extbinary", int(SPF_Ext_Binary), "Extensible binary encoding" }, | |||
28 | clEnumValN(SPF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(SPF_Text), "Text encoding" }, | |||
29 | clEnumValN(SPF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)" } | |||
30 | "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(SPF_GCC), "GCC encoding (only meaningful for -sample)" })); | |||
31 | ||||
32 | cl::opt<bool> UseMD5( | |||
33 | "use-md5", cl::init(false), cl::Hidden, | |||
34 | cl::desc("Use md5 to represent function names in the output profile (only " | |||
35 | "meaningful for -extbinary)")); | |||
36 | ||||
37 | static cl::opt<bool> PopulateProfileSymbolList( | |||
38 | "populate-profile-symbol-list", cl::init(false), cl::Hidden, | |||
39 | cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); | |||
40 | ||||
41 | static cl::opt<bool> FillZeroForAllFuncs( | |||
42 | "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, | |||
43 | cl::desc("Attribute all functions' range with zero count " | |||
44 | "even it's not hit by any samples.")); | |||
45 | ||||
46 | static cl::opt<int32_t, true> RecursionCompression( | |||
47 | "compress-recursion", | |||
48 | cl::desc("Compressing recursion by deduplicating adjacent frame " | |||
49 | "sequences up to the specified size. -1 means no size limit."), | |||
50 | cl::Hidden, | |||
51 | cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); | |||
52 | ||||
53 | static cl::opt<bool> | |||
54 | TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore, | |||
55 | cl::desc("If the total count of the profile is smaller " | |||
56 | "than threshold, it will be trimmed.")); | |||
57 | ||||
58 | static cl::opt<bool> CSProfMergeColdContext( | |||
59 | "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, | |||
60 | cl::desc("If the total count of context profile is smaller than " | |||
61 | "the threshold, it will be merged into context-less base " | |||
62 | "profile.")); | |||
63 | ||||
64 | static cl::opt<uint32_t> CSProfMaxColdContextDepth( | |||
65 | "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, | |||
66 | cl::desc("Keep the last K contexts while merging cold profile. 1 means the " | |||
67 | "context-less base profile")); | |||
68 | ||||
69 | static cl::opt<int, true> CSProfMaxContextDepth( | |||
70 | "csprof-max-context-depth", cl::ZeroOrMore, | |||
71 | cl::desc("Keep the last K contexts while merging profile. -1 means no " | |||
72 | "depth limit."), | |||
73 | cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); | |||
74 | ||||
75 | static cl::opt<double> HotFunctionDensityThreshold( | |||
76 | "hot-function-density-threshold", llvm::cl::init(1000), | |||
77 | llvm::cl::desc( | |||
78 | "specify density threshold for hot functions (default: 1000)"), | |||
79 | llvm::cl::Optional); | |||
80 | static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), | |||
81 | llvm::cl::desc("show profile density details"), | |||
82 | llvm::cl::Optional); | |||
83 | ||||
84 | static cl::opt<bool> UpdateTotalSamples( | |||
85 | "update-total-samples", llvm::cl::init(false), | |||
86 | llvm::cl::desc( | |||
87 | "Update total samples by accumulating all its body samples."), | |||
88 | llvm::cl::Optional); | |||
89 | ||||
90 | extern cl::opt<int> ProfileSummaryCutoffHot; | |||
91 | ||||
92 | static cl::opt<bool> GenCSNestedProfile( | |||
93 | "gen-cs-nested-profile", cl::Hidden, cl::init(false), | |||
94 | cl::desc("Generate nested function profiles for CSSPGO")); | |||
95 | ||||
96 | using namespace llvm; | |||
97 | using namespace sampleprof; | |||
98 | ||||
99 | namespace llvm { | |||
100 | namespace sampleprof { | |||
101 | ||||
102 | // Initialize the MaxCompressionSize to -1 which means no size limit | |||
103 | int32_t CSProfileGenerator::MaxCompressionSize = -1; | |||
104 | ||||
105 | int CSProfileGenerator::MaxContextDepth = -1; | |||
106 | ||||
107 | bool ProfileGeneratorBase::UseFSDiscriminator = false; | |||
108 | ||||
109 | std::unique_ptr<ProfileGeneratorBase> | |||
110 | ProfileGeneratorBase::create(ProfiledBinary *Binary, | |||
111 | const ContextSampleCounterMap &SampleCounters, | |||
112 | bool ProfileIsCSFlat) { | |||
113 | std::unique_ptr<ProfileGeneratorBase> Generator; | |||
114 | if (ProfileIsCSFlat) { | |||
115 | if (Binary->useFSDiscriminator()) | |||
116 | exitWithError("FS discriminator is not supported in CS profile."); | |||
117 | Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); | |||
118 | } else { | |||
119 | Generator.reset(new ProfileGenerator(Binary, SampleCounters)); | |||
120 | } | |||
121 | ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); | |||
122 | FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); | |||
123 | ||||
124 | return Generator; | |||
125 | } | |||
126 | ||||
127 | void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, | |||
128 | SampleProfileMap &ProfileMap) { | |||
129 | // Populate profile symbol list if extended binary format is used. | |||
130 | ProfileSymbolList SymbolList; | |||
131 | ||||
132 | if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { | |||
133 | Binary->populateSymbolListFromDWARF(SymbolList); | |||
134 | Writer->setProfileSymbolList(&SymbolList); | |||
135 | } | |||
136 | ||||
137 | if (std::error_code EC = Writer->write(ProfileMap)) | |||
138 | exitWithError(std::move(EC)); | |||
139 | } | |||
140 | ||||
141 | void ProfileGeneratorBase::write() { | |||
142 | auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); | |||
143 | if (std::error_code EC = WriterOrErr.getError()) | |||
144 | exitWithError(EC, OutputFilename); | |||
145 | ||||
146 | if (UseMD5) { | |||
147 | if (OutputFormat != SPF_Ext_Binary) | |||
148 | WithColor::warning() << "-use-md5 is ignored. Specify " | |||
149 | "--format=extbinary to enable it\n"; | |||
150 | else | |||
151 | WriterOrErr.get()->setUseMD5(); | |||
152 | } | |||
153 | ||||
154 | write(std::move(WriterOrErr.get()), ProfileMap); | |||
155 | } | |||
156 | ||||
157 | void ProfileGeneratorBase::showDensitySuggestion(double Density) { | |||
158 | if (Density == 0.0) | |||
159 | WithColor::warning() << "The --profile-summary-cutoff-hot option may be " | |||
160 | "set too low. Please check your command.\n"; | |||
161 | else if (Density < HotFunctionDensityThreshold) | |||
162 | WithColor::warning() | |||
163 | << "AutoFDO is estimated to optimize better with " | |||
164 | << format("%.1f", HotFunctionDensityThreshold / Density) | |||
165 | << "x more samples. Please consider increasing sampling rate or " | |||
166 | "profiling for longer duration to get more samples.\n"; | |||
167 | ||||
168 | if (ShowDensity) | |||
169 | outs() << "Minimum profile density for hot functions with top " | |||
170 | << format("%.2f", | |||
171 | static_cast<double>(ProfileSummaryCutoffHot.getValue()) / | |||
172 | 10000) | |||
173 | << "% total samples: " << format("%.1f", Density) << "\n"; | |||
174 | } | |||
175 | ||||
176 | double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, | |||
177 | uint64_t HotCntThreshold) { | |||
178 | double Density = DBL_MAX1.7976931348623157e+308; | |||
179 | std::vector<const FunctionSamples *> HotFuncs; | |||
180 | for (auto &I : Profiles) { | |||
181 | auto &FuncSamples = I.second; | |||
182 | if (FuncSamples.getTotalSamples() < HotCntThreshold) | |||
183 | continue; | |||
184 | HotFuncs.emplace_back(&FuncSamples); | |||
185 | } | |||
186 | ||||
187 | for (auto *FuncSamples : HotFuncs) { | |||
188 | auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); | |||
189 | if (!Func) | |||
190 | continue; | |||
191 | uint64_t FuncSize = Func->getFuncSize(); | |||
192 | if (FuncSize == 0) | |||
193 | continue; | |||
194 | Density = | |||
195 | std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) / | |||
196 | FuncSize); | |||
197 | } | |||
198 | ||||
199 | return Density == DBL_MAX1.7976931348623157e+308 ? 0.0 : Density; | |||
200 | } | |||
201 | ||||
202 | void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, | |||
203 | const RangeSample &Ranges) { | |||
204 | ||||
205 | /* | |||
206 | Regions may overlap with each other. Using the boundary info, find all | |||
207 | disjoint ranges and their sample count. BoundaryPoint contains the count | |||
208 | multiple samples begin/end at this points. | |||
209 | ||||
210 | |<--100-->| Sample1 | |||
211 | |<------200------>| Sample2 | |||
212 | A B C | |||
213 | ||||
214 | In the example above, | |||
215 | Sample1 begins at A, ends at B, its value is 100. | |||
216 | Sample2 beings at A, ends at C, its value is 200. | |||
217 | For A, BeginCount is the sum of sample begins at A, which is 300 and no | |||
218 | samples ends at A, so EndCount is 0. | |||
219 | Then boundary points A, B, and C with begin/end counts are: | |||
220 | A: (300, 0) | |||
221 | B: (0, 100) | |||
222 | C: (0, 200) | |||
223 | */ | |||
224 | struct BoundaryPoint { | |||
225 | // Sum of sample counts beginning at this point | |||
226 | uint64_t BeginCount = UINT64_MAX(18446744073709551615UL); | |||
227 | // Sum of sample counts ending at this point | |||
228 | uint64_t EndCount = UINT64_MAX(18446744073709551615UL); | |||
229 | // Is the begin point of a zero range. | |||
230 | bool IsZeroRangeBegin = false; | |||
231 | // Is the end point of a zero range. | |||
232 | bool IsZeroRangeEnd = false; | |||
233 | ||||
234 | void addBeginCount(uint64_t Count) { | |||
235 | if (BeginCount == UINT64_MAX(18446744073709551615UL)) | |||
236 | BeginCount = 0; | |||
237 | BeginCount += Count; | |||
238 | } | |||
239 | ||||
240 | void addEndCount(uint64_t Count) { | |||
241 | if (EndCount == UINT64_MAX(18446744073709551615UL)) | |||
242 | EndCount = 0; | |||
243 | EndCount += Count; | |||
244 | } | |||
245 | }; | |||
246 | ||||
247 | /* | |||
248 | For the above example. With boundary points, follwing logic finds two | |||
249 | disjoint region of | |||
250 | ||||
251 | [A,B]: 300 | |||
252 | [B+1,C]: 200 | |||
253 | ||||
254 | If there is a boundary point that both begin and end, the point itself | |||
255 | becomes a separate disjoint region. For example, if we have original | |||
256 | ranges of | |||
257 | ||||
258 | |<--- 100 --->| | |||
259 | |<--- 200 --->| | |||
260 | A B C | |||
261 | ||||
262 | there are three boundary points with their begin/end counts of | |||
263 | ||||
264 | A: (100, 0) | |||
265 | B: (200, 100) | |||
266 | C: (0, 200) | |||
267 | ||||
268 | the disjoint ranges would be | |||
269 | ||||
270 | [A, B-1]: 100 | |||
271 | [B, B]: 300 | |||
272 | [B+1, C]: 200. | |||
273 | ||||
274 | Example for zero value range: | |||
275 | ||||
276 | |<--- 100 --->| | |||
277 | |<--- 200 --->| | |||
278 | |<--------------- 0 ----------------->| | |||
279 | A B C D E F | |||
280 | ||||
281 | [A, B-1] : 0 | |||
282 | [B, C] : 100 | |||
283 | [C+1, D-1]: 0 | |||
284 | [D, E] : 200 | |||
285 | [E+1, F] : 0 | |||
286 | */ | |||
287 | std::map<uint64_t, BoundaryPoint> Boundaries; | |||
288 | ||||
289 | for (const auto &Item : Ranges) { | |||
290 | assert(Item.first.first <= Item.first.second &&(static_cast <bool> (Item.first.first <= Item.first. second && "Invalid instruction range") ? void (0) : __assert_fail ("Item.first.first <= Item.first.second && \"Invalid instruction range\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 291, __extension__ __PRETTY_FUNCTION__)) | |||
291 | "Invalid instruction range")(static_cast <bool> (Item.first.first <= Item.first. second && "Invalid instruction range") ? void (0) : __assert_fail ("Item.first.first <= Item.first.second && \"Invalid instruction range\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 291, __extension__ __PRETTY_FUNCTION__)); | |||
292 | auto &BeginPoint = Boundaries[Item.first.first]; | |||
293 | auto &EndPoint = Boundaries[Item.first.second]; | |||
294 | uint64_t Count = Item.second; | |||
295 | ||||
296 | BeginPoint.addBeginCount(Count); | |||
297 | EndPoint.addEndCount(Count); | |||
298 | if (Count == 0) { | |||
299 | BeginPoint.IsZeroRangeBegin = true; | |||
300 | EndPoint.IsZeroRangeEnd = true; | |||
301 | } | |||
302 | } | |||
303 | ||||
304 | // Use UINT64_MAX to indicate there is no existing range between BeginAddress | |||
305 | // and the next valid address | |||
306 | uint64_t BeginAddress = UINT64_MAX(18446744073709551615UL); | |||
307 | int ZeroRangeDepth = 0; | |||
308 | uint64_t Count = 0; | |||
309 | for (const auto &Item : Boundaries) { | |||
310 | uint64_t Address = Item.first; | |||
311 | const BoundaryPoint &Point = Item.second; | |||
312 | if (Point.BeginCount != UINT64_MAX(18446744073709551615UL)) { | |||
313 | if (BeginAddress != UINT64_MAX(18446744073709551615UL)) | |||
314 | DisjointRanges[{BeginAddress, Address - 1}] = Count; | |||
315 | Count += Point.BeginCount; | |||
316 | BeginAddress = Address; | |||
317 | ZeroRangeDepth += Point.IsZeroRangeBegin; | |||
318 | } | |||
319 | if (Point.EndCount != UINT64_MAX(18446744073709551615UL)) { | |||
320 | assert((BeginAddress != UINT64_MAX) &&(static_cast <bool> ((BeginAddress != (18446744073709551615UL )) && "First boundary point cannot be 'end' point") ? void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 321, __extension__ __PRETTY_FUNCTION__)) | |||
321 | "First boundary point cannot be 'end' point")(static_cast <bool> ((BeginAddress != (18446744073709551615UL )) && "First boundary point cannot be 'end' point") ? void (0) : __assert_fail ("(BeginAddress != UINT64_MAX) && \"First boundary point cannot be 'end' point\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 321, __extension__ __PRETTY_FUNCTION__)); | |||
322 | DisjointRanges[{BeginAddress, Address}] = Count; | |||
323 | assert(Count >= Point.EndCount && "Mismatched live ranges")(static_cast <bool> (Count >= Point.EndCount && "Mismatched live ranges") ? void (0) : __assert_fail ("Count >= Point.EndCount && \"Mismatched live ranges\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 323, __extension__ __PRETTY_FUNCTION__)); | |||
324 | Count -= Point.EndCount; | |||
325 | BeginAddress = Address + 1; | |||
326 | ZeroRangeDepth -= Point.IsZeroRangeEnd; | |||
327 | // If the remaining count is zero and it's no longer in a zero range, this | |||
328 | // means we consume all the ranges before, thus mark BeginAddress as | |||
329 | // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: | |||
330 | // [<---- 10 ---->] | |||
331 | // [<---- 20 ---->] | |||
332 | // A B C D | |||
333 | // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't | |||
334 | // have the [B+1, C-1] zero range. | |||
335 | if (Count == 0 && ZeroRangeDepth == 0) | |||
336 | BeginAddress = UINT64_MAX(18446744073709551615UL); | |||
337 | } | |||
338 | } | |||
339 | } | |||
340 | ||||
341 | void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( | |||
342 | FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, | |||
343 | uint64_t Count) { | |||
344 | // Use the maximum count of samples with same line location | |||
345 | uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); | |||
346 | ||||
347 | // Use duplication factor to compensated for loop unroll/vectorization. | |||
348 | // Note that this is only needed when we're taking MAX of the counts at | |||
349 | // the location instead of SUM. | |||
350 | Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); | |||
351 | ||||
352 | ErrorOr<uint64_t> R = | |||
353 | FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); | |||
354 | ||||
355 | uint64_t PreviousCount = R ? R.get() : 0; | |||
356 | if (PreviousCount <= Count) { | |||
357 | FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, | |||
358 | Count - PreviousCount); | |||
359 | } | |||
360 | } | |||
361 | ||||
362 | void ProfileGeneratorBase::updateTotalSamples() { | |||
363 | if (!UpdateTotalSamples) | |||
364 | return; | |||
365 | ||||
366 | for (auto &Item : ProfileMap) { | |||
367 | FunctionSamples &FunctionProfile = Item.second; | |||
368 | FunctionProfile.updateTotalSamples(); | |||
369 | } | |||
370 | } | |||
371 | ||||
372 | FunctionSamples & | |||
373 | ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { | |||
374 | SampleContext Context(FuncName); | |||
375 | auto Ret = ProfileMap.emplace(Context, FunctionSamples()); | |||
376 | if (Ret.second) { | |||
377 | FunctionSamples &FProfile = Ret.first->second; | |||
378 | FProfile.setContext(Context); | |||
379 | } | |||
380 | return Ret.first->second; | |||
381 | } | |||
382 | ||||
383 | void ProfileGenerator::generateProfile() { | |||
384 | if (Binary->usePseudoProbes()) { | |||
385 | // TODO: Support probe based profile generation | |||
386 | exitWithError("Probe based profile generation not supported for AutoFDO, " | |||
387 | "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`."); | |||
388 | } else { | |||
389 | generateLineNumBasedProfile(); | |||
390 | } | |||
391 | postProcessProfiles(); | |||
392 | } | |||
393 | ||||
394 | void ProfileGenerator::postProcessProfiles() { | |||
395 | computeSummaryAndThreshold(); | |||
396 | trimColdProfiles(ProfileMap, ColdCountThreshold); | |||
397 | calculateAndShowDensity(ProfileMap); | |||
398 | } | |||
399 | ||||
400 | void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, | |||
401 | uint64_t ColdCntThreshold) { | |||
402 | if (!TrimColdProfile) | |||
403 | return; | |||
404 | ||||
405 | // Move cold profiles into a tmp container. | |||
406 | std::vector<SampleContext> ColdProfiles; | |||
407 | for (const auto &I : ProfileMap) { | |||
408 | if (I.second.getTotalSamples() < ColdCntThreshold) | |||
409 | ColdProfiles.emplace_back(I.first); | |||
410 | } | |||
411 | ||||
412 | // Remove the cold profile from ProfileMap. | |||
413 | for (const auto &I : ColdProfiles) | |||
414 | ProfileMap.erase(I); | |||
415 | } | |||
416 | ||||
417 | void ProfileGenerator::generateLineNumBasedProfile() { | |||
418 | assert(SampleCounters.size() == 1 &&(static_cast <bool> (SampleCounters.size() == 1 && "Must have one entry for profile generation.") ? void (0) : __assert_fail ("SampleCounters.size() == 1 && \"Must have one entry for profile generation.\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 419, __extension__ __PRETTY_FUNCTION__)) | |||
419 | "Must have one entry for profile generation.")(static_cast <bool> (SampleCounters.size() == 1 && "Must have one entry for profile generation.") ? void (0) : __assert_fail ("SampleCounters.size() == 1 && \"Must have one entry for profile generation.\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 419, __extension__ __PRETTY_FUNCTION__)); | |||
420 | const SampleCounter &SC = SampleCounters.begin()->second; | |||
421 | // Fill in function body samples | |||
422 | populateBodySamplesForAllFunctions(SC.RangeCounter); | |||
423 | // Fill in boundary sample counts as well as call site samples for calls | |||
424 | populateBoundarySamplesForAllFunctions(SC.BranchCounter); | |||
425 | ||||
426 | updateTotalSamples(); | |||
427 | } | |||
428 | ||||
429 | FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( | |||
430 | const SampleContextFrameVector &FrameVec, uint64_t Count) { | |||
431 | // Get top level profile | |||
432 | FunctionSamples *FunctionProfile = | |||
433 | &getTopLevelFunctionProfile(FrameVec[0].FuncName); | |||
434 | FunctionProfile->addTotalSamples(Count); | |||
435 | ||||
436 | for (size_t I = 1; I < FrameVec.size(); I++) { | |||
437 | LineLocation Callsite( | |||
438 | FrameVec[I - 1].Location.LineOffset, | |||
439 | getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); | |||
440 | FunctionSamplesMap &SamplesMap = | |||
441 | FunctionProfile->functionSamplesAt(Callsite); | |||
442 | auto Ret = | |||
443 | SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); | |||
444 | if (Ret.second) { | |||
445 | SampleContext Context(FrameVec[I].FuncName); | |||
446 | Ret.first->second.setContext(Context); | |||
447 | } | |||
448 | FunctionProfile = &Ret.first->second; | |||
449 | FunctionProfile->addTotalSamples(Count); | |||
450 | } | |||
451 | ||||
452 | return *FunctionProfile; | |||
453 | } | |||
454 | ||||
455 | RangeSample | |||
456 | ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { | |||
457 | RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); | |||
458 | if (FillZeroForAllFuncs) { | |||
459 | for (auto &FuncI : Binary->getAllBinaryFunctions()) { | |||
460 | for (auto &R : FuncI.second.Ranges) { | |||
461 | Ranges[{R.first, R.second - 1}] += 0; | |||
462 | } | |||
463 | } | |||
464 | } else { | |||
465 | // For each range, we search for all ranges of the function it belongs to | |||
466 | // and initialize it with zero count, so it remains zero if doesn't hit any | |||
467 | // samples. This is to be consistent with compiler that interpret zero count | |||
468 | // as unexecuted(cold). | |||
469 | for (const auto &I : RangeCounter) { | |||
470 | uint64_t StartOffset = I.first.first; | |||
471 | for (const auto &Range : Binary->getRangesForOffset(StartOffset)) | |||
472 | Ranges[{Range.first, Range.second - 1}] += 0; | |||
473 | } | |||
474 | } | |||
475 | RangeSample DisjointRanges; | |||
476 | findDisjointRanges(DisjointRanges, Ranges); | |||
477 | return DisjointRanges; | |||
478 | } | |||
479 | ||||
480 | void ProfileGenerator::populateBodySamplesForAllFunctions( | |||
481 | const RangeSample &RangeCounter) { | |||
482 | for (const auto &Range : preprocessRangeCounter(RangeCounter)) { | |||
483 | uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); | |||
484 | uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); | |||
485 | uint64_t Count = Range.second; | |||
486 | ||||
487 | InstructionPointer IP(Binary, RangeBegin, true); | |||
488 | // Disjoint ranges may have range in the middle of two instr, | |||
489 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range | |||
490 | // can be Addr1+1 to Addr2-1. We should ignore such range. | |||
491 | if (IP.Address > RangeEnd) | |||
492 | continue; | |||
493 | ||||
494 | do { | |||
495 | uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); | |||
496 | const SampleContextFrameVector &FrameVec = | |||
497 | Binary->getFrameLocationStack(Offset); | |||
498 | if (!FrameVec.empty()) { | |||
499 | // FIXME: As accumulating total count per instruction caused some | |||
500 | // regression, we changed to accumulate total count per byte as a | |||
501 | // workaround. Tuning hotness threshold on the compiler side might be | |||
502 | // necessary in the future. | |||
503 | FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( | |||
504 | FrameVec, Count * Binary->getInstSize(Offset)); | |||
505 | updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), | |||
506 | Count); | |||
507 | } | |||
508 | } while (IP.advance() && IP.Address <= RangeEnd); | |||
509 | } | |||
510 | } | |||
511 | ||||
512 | StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { | |||
513 | // Get the function range by branch target if it's a call branch. | |||
514 | auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); | |||
515 | ||||
516 | // We won't accumulate sample count for a range whose start is not the real | |||
517 | // function entry such as outlined function or inner labels. | |||
518 | if (!FRange || !FRange->IsFuncEntry) | |||
519 | return StringRef(); | |||
520 | ||||
521 | return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); | |||
522 | } | |||
523 | ||||
524 | void ProfileGenerator::populateBoundarySamplesForAllFunctions( | |||
525 | const BranchSample &BranchCounters) { | |||
526 | for (const auto &Entry : BranchCounters) { | |||
527 | uint64_t SourceOffset = Entry.first.first; | |||
528 | uint64_t TargetOffset = Entry.first.second; | |||
529 | uint64_t Count = Entry.second; | |||
530 | assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch" ) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 530, __extension__ __PRETTY_FUNCTION__)); | |||
531 | ||||
532 | StringRef CalleeName = getCalleeNameForOffset(TargetOffset); | |||
533 | if (CalleeName.size() == 0) | |||
534 | continue; | |||
535 | // Record called target sample and its count. | |||
536 | const SampleContextFrameVector &FrameVec = | |||
537 | Binary->getFrameLocationStack(SourceOffset); | |||
538 | if (!FrameVec.empty()) { | |||
539 | FunctionSamples &FunctionProfile = | |||
540 | getLeafProfileAndAddTotalSamples(FrameVec, 0); | |||
541 | FunctionProfile.addCalledTargetSamples( | |||
542 | FrameVec.back().Location.LineOffset, | |||
543 | getBaseDiscriminator(FrameVec.back().Location.Discriminator), | |||
544 | CalleeName, Count); | |||
545 | } | |||
546 | // Add head samples for callee. | |||
547 | FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); | |||
548 | CalleeProfile.addHeadSamples(Count); | |||
549 | } | |||
550 | } | |||
551 | ||||
552 | void ProfileGeneratorBase::calculateAndShowDensity( | |||
553 | const SampleProfileMap &Profiles) { | |||
554 | double Density = calculateDensity(Profiles, HotCountThreshold); | |||
555 | showDensitySuggestion(Density); | |||
556 | } | |||
557 | ||||
558 | FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( | |||
559 | const SampleContextFrameVector &Context, bool WasLeafInlined) { | |||
560 | auto I = ProfileMap.find(SampleContext(Context)); | |||
561 | if (I == ProfileMap.end()) { | |||
562 | // Save the new context for future references. | |||
563 | SampleContextFrames NewContext = *Contexts.insert(Context).first; | |||
564 | SampleContext FContext(NewContext, RawContext); | |||
565 | auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); | |||
566 | if (WasLeafInlined) | |||
567 | FContext.setAttribute(ContextWasInlined); | |||
568 | FunctionSamples &FProfile = Ret.first->second; | |||
569 | FProfile.setContext(FContext); | |||
570 | return Ret.first->second; | |||
571 | } | |||
572 | return I->second; | |||
573 | } | |||
574 | ||||
575 | void CSProfileGenerator::generateProfile() { | |||
576 | FunctionSamples::ProfileIsCSFlat = true; | |||
577 | ||||
578 | if (Binary->getTrackFuncContextSize()) | |||
| ||||
579 | computeSizeForProfiledFunctions(); | |||
580 | ||||
581 | if (Binary->usePseudoProbes()) { | |||
582 | // Enable pseudo probe functionalities in SampleProf | |||
583 | FunctionSamples::ProfileIsProbeBased = true; | |||
584 | generateProbeBasedProfile(); | |||
585 | } else { | |||
586 | generateLineNumBasedProfile(); | |||
587 | } | |||
588 | postProcessProfiles(); | |||
589 | } | |||
590 | ||||
591 | void CSProfileGenerator::computeSizeForProfiledFunctions() { | |||
592 | // Hash map to deduplicate the function range and the item is a pair of | |||
593 | // function start and end offset. | |||
594 | std::unordered_map<uint64_t, uint64_t> AggregatedRanges; | |||
595 | // Go through all the ranges in the CS counters, use the start of the range to | |||
596 | // look up the function it belongs and record the function range. | |||
597 | for (const auto &CI : SampleCounters) { | |||
598 | for (const auto &Item : CI.second.RangeCounter) { | |||
599 | // FIXME: Filter the bogus crossing function range. | |||
600 | uint64_t StartOffset = Item.first.first; | |||
601 | // Note that a function can be spilt into multiple ranges, so get all | |||
602 | // ranges of the function. | |||
603 | for (const auto &Range : Binary->getRangesForOffset(StartOffset)) | |||
604 | AggregatedRanges[Range.first] = Range.second; | |||
605 | } | |||
606 | } | |||
607 | ||||
608 | for (const auto &I : AggregatedRanges) { | |||
609 | uint64_t StartOffset = I.first; | |||
610 | uint64_t EndOffset = I.second; | |||
611 | Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); | |||
612 | } | |||
613 | } | |||
614 | ||||
615 | void CSProfileGenerator::generateLineNumBasedProfile() { | |||
616 | for (const auto &CI : SampleCounters) { | |||
617 | const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); | |||
618 | ||||
619 | // Get or create function profile for the range | |||
620 | FunctionSamples &FunctionProfile = | |||
621 | getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); | |||
622 | ||||
623 | // Fill in function body samples | |||
624 | populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); | |||
625 | // Fill in boundary sample counts as well as call site samples for calls | |||
626 | populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, | |||
627 | CI.second.BranchCounter); | |||
628 | } | |||
629 | // Fill in call site value sample for inlined calls and also use context to | |||
630 | // infer missing samples. Since we don't have call count for inlined | |||
631 | // functions, we estimate it from inlinee's profile using the entry of the | |||
632 | // body sample. | |||
633 | populateInferredFunctionSamples(); | |||
634 | ||||
635 | updateTotalSamples(); | |||
636 | } | |||
637 | ||||
638 | void CSProfileGenerator::populateBodySamplesForFunction( | |||
639 | FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { | |||
640 | // Compute disjoint ranges first, so we can use MAX | |||
641 | // for calculating count for each location. | |||
642 | RangeSample Ranges; | |||
643 | findDisjointRanges(Ranges, RangeCounter); | |||
644 | for (const auto &Range : Ranges) { | |||
645 | uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); | |||
646 | uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); | |||
647 | uint64_t Count = Range.second; | |||
648 | // Disjoint ranges have introduce zero-filled gap that | |||
649 | // doesn't belong to current context, filter them out. | |||
650 | if (Count == 0) | |||
651 | continue; | |||
652 | ||||
653 | InstructionPointer IP(Binary, RangeBegin, true); | |||
654 | // Disjoint ranges may have range in the middle of two instr, | |||
655 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range | |||
656 | // can be Addr1+1 to Addr2-1. We should ignore such range. | |||
657 | if (IP.Address > RangeEnd) | |||
658 | continue; | |||
659 | ||||
660 | do { | |||
661 | uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); | |||
662 | auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); | |||
663 | if (LeafLoc.hasValue()) { | |||
664 | // Recording body sample for this specific context | |||
665 | updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); | |||
666 | FunctionProfile.addTotalSamples(Count); | |||
667 | } | |||
668 | } while (IP.advance() && IP.Address <= RangeEnd); | |||
669 | } | |||
670 | } | |||
671 | ||||
672 | void CSProfileGenerator::populateBoundarySamplesForFunction( | |||
673 | SampleContextFrames ContextId, FunctionSamples &FunctionProfile, | |||
674 | const BranchSample &BranchCounters) { | |||
675 | ||||
676 | for (const auto &Entry : BranchCounters) { | |||
677 | uint64_t SourceOffset = Entry.first.first; | |||
678 | uint64_t TargetOffset = Entry.first.second; | |||
679 | uint64_t Count = Entry.second; | |||
680 | assert(Count != 0 && "Unexpected zero weight branch")(static_cast <bool> (Count != 0 && "Unexpected zero weight branch" ) ? void (0) : __assert_fail ("Count != 0 && \"Unexpected zero weight branch\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 680, __extension__ __PRETTY_FUNCTION__)); | |||
681 | ||||
682 | StringRef CalleeName = getCalleeNameForOffset(TargetOffset); | |||
683 | if (CalleeName.size() == 0) | |||
684 | continue; | |||
685 | ||||
686 | // Record called target sample and its count | |||
687 | auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); | |||
688 | if (!LeafLoc.hasValue()) | |||
689 | continue; | |||
690 | FunctionProfile.addCalledTargetSamples( | |||
691 | LeafLoc->Location.LineOffset, | |||
692 | getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, | |||
693 | Count); | |||
694 | ||||
695 | // Record head sample for called target(callee) | |||
696 | SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); | |||
697 | assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&(static_cast <bool> (CalleeCtx.back().FuncName == LeafLoc ->FuncName && "Leaf function name doesn't match") ? void (0) : __assert_fail ("CalleeCtx.back().FuncName == LeafLoc->FuncName && \"Leaf function name doesn't match\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 698, __extension__ __PRETTY_FUNCTION__)) | |||
698 | "Leaf function name doesn't match")(static_cast <bool> (CalleeCtx.back().FuncName == LeafLoc ->FuncName && "Leaf function name doesn't match") ? void (0) : __assert_fail ("CalleeCtx.back().FuncName == LeafLoc->FuncName && \"Leaf function name doesn't match\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 698, __extension__ __PRETTY_FUNCTION__)); | |||
699 | CalleeCtx.back() = *LeafLoc; | |||
700 | CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); | |||
701 | FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); | |||
702 | CalleeProfile.addHeadSamples(Count); | |||
703 | } | |||
704 | } | |||
705 | ||||
706 | static SampleContextFrame | |||
707 | getCallerContext(SampleContextFrames CalleeContext, | |||
708 | SampleContextFrameVector &CallerContext) { | |||
709 | assert(CalleeContext.size() > 1 && "Unexpected empty context")(static_cast <bool> (CalleeContext.size() > 1 && "Unexpected empty context") ? void (0) : __assert_fail ("CalleeContext.size() > 1 && \"Unexpected empty context\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 709, __extension__ __PRETTY_FUNCTION__)); | |||
710 | CalleeContext = CalleeContext.drop_back(); | |||
711 | CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); | |||
712 | SampleContextFrame CallerFrame = CallerContext.back(); | |||
713 | CallerContext.back().Location = LineLocation(0, 0); | |||
714 | return CallerFrame; | |||
715 | } | |||
716 | ||||
717 | void CSProfileGenerator::populateInferredFunctionSamples() { | |||
718 | for (const auto &Item : ProfileMap) { | |||
719 | const auto &CalleeContext = Item.first; | |||
720 | const FunctionSamples &CalleeProfile = Item.second; | |||
721 | ||||
722 | // If we already have head sample counts, we must have value profile | |||
723 | // for call sites added already. Skip to avoid double counting. | |||
724 | if (CalleeProfile.getHeadSamples()) | |||
725 | continue; | |||
726 | // If we don't have context, nothing to do for caller's call site. | |||
727 | // This could happen for entry point function. | |||
728 | if (CalleeContext.isBaseContext()) | |||
729 | continue; | |||
730 | ||||
731 | // Infer Caller's frame loc and context ID through string splitting | |||
732 | SampleContextFrameVector CallerContextId; | |||
733 | SampleContextFrame &&CallerLeafFrameLoc = | |||
734 | getCallerContext(CalleeContext.getContextFrames(), CallerContextId); | |||
735 | SampleContextFrames CallerContext(CallerContextId); | |||
736 | ||||
737 | // It's possible that we haven't seen any sample directly in the caller, | |||
738 | // in which case CallerProfile will not exist. But we can't modify | |||
739 | // ProfileMap while iterating it. | |||
740 | // TODO: created function profile for those callers too | |||
741 | if (ProfileMap.find(CallerContext) == ProfileMap.end()) | |||
742 | continue; | |||
743 | FunctionSamples &CallerProfile = ProfileMap[CallerContext]; | |||
744 | ||||
745 | // Since we don't have call count for inlined functions, we | |||
746 | // estimate it from inlinee's profile using entry body sample. | |||
747 | uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); | |||
748 | // If we don't have samples with location, use 1 to indicate live. | |||
749 | if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) | |||
750 | EstimatedCallCount = 1; | |||
751 | CallerProfile.addCalledTargetSamples( | |||
752 | CallerLeafFrameLoc.Location.LineOffset, | |||
753 | CallerLeafFrameLoc.Location.Discriminator, | |||
754 | CalleeProfile.getContext().getName(), EstimatedCallCount); | |||
755 | CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, | |||
756 | CallerLeafFrameLoc.Location.Discriminator, | |||
757 | EstimatedCallCount); | |||
758 | CallerProfile.addTotalSamples(EstimatedCallCount); | |||
759 | } | |||
760 | } | |||
761 | ||||
762 | void CSProfileGenerator::postProcessProfiles() { | |||
763 | // Compute hot/cold threshold based on profile. This will be used for cold | |||
764 | // context profile merging/trimming. | |||
765 | computeSummaryAndThreshold(); | |||
766 | ||||
767 | // Run global pre-inliner to adjust/merge context profile based on estimated | |||
768 | // inline decisions. | |||
769 | if (EnableCSPreInliner) { | |||
770 | CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) | |||
771 | .run(); | |||
772 | // Turn off the profile merger by default unless it is explicitly enabled. | |||
773 | if (!CSProfMergeColdContext.getNumOccurrences()) | |||
774 | CSProfMergeColdContext = false; | |||
775 | } | |||
776 | ||||
777 | // Trim and merge cold context profile using cold threshold above. | |||
778 | if (TrimColdProfile || CSProfMergeColdContext) { | |||
779 | SampleContextTrimmer(ProfileMap) | |||
780 | .trimAndMergeColdContextProfiles( | |||
781 | HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, | |||
782 | CSProfMaxColdContextDepth, EnableCSPreInliner); | |||
783 | } | |||
784 | ||||
785 | // Merge function samples of CS profile to calculate profile density. | |||
786 | sampleprof::SampleProfileMap ContextLessProfiles; | |||
787 | for (const auto &I : ProfileMap) { | |||
788 | ContextLessProfiles[I.second.getName()].merge(I.second); | |||
789 | } | |||
790 | ||||
791 | calculateAndShowDensity(ContextLessProfiles); | |||
792 | if (GenCSNestedProfile) { | |||
793 | CSProfileConverter CSConverter(ProfileMap); | |||
794 | CSConverter.convertProfiles(); | |||
795 | FunctionSamples::ProfileIsCSFlat = false; | |||
796 | FunctionSamples::ProfileIsCSNested = EnableCSPreInliner; | |||
797 | } | |||
798 | } | |||
799 | ||||
800 | void ProfileGeneratorBase::computeSummaryAndThreshold() { | |||
801 | SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); | |||
802 | auto Summary = Builder.computeSummaryForProfiles(ProfileMap); | |||
803 | HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( | |||
804 | (Summary->getDetailedSummary())); | |||
805 | ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( | |||
806 | (Summary->getDetailedSummary())); | |||
807 | } | |||
808 | ||||
809 | // Helper function to extract context prefix string stack | |||
810 | // Extract context stack for reusing, leaf context stack will | |||
811 | // be added compressed while looking up function profile | |||
812 | static void extractPrefixContextStack( | |||
813 | SampleContextFrameVector &ContextStack, | |||
814 | const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes, | |||
815 | ProfiledBinary *Binary) { | |||
816 | for (const auto *P : Probes) { | |||
817 | Binary->getInlineContextForProbe(P, ContextStack, true); | |||
818 | } | |||
819 | } | |||
820 | ||||
821 | void CSProfileGenerator::generateProbeBasedProfile() { | |||
822 | for (const auto &CI : SampleCounters) { | |||
823 | const ProbeBasedCtxKey *CtxKey = | |||
824 | dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr()); | |||
825 | SampleContextFrameVector ContextStack; | |||
826 | extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); | |||
| ||||
827 | // Fill in function body samples from probes, also infer caller's samples | |||
828 | // from callee's probe | |||
829 | populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); | |||
830 | // Fill in boundary samples for a call probe | |||
831 | populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); | |||
832 | } | |||
833 | } | |||
834 | ||||
835 | void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, | |||
836 | ProbeCounterMap &ProbeCounter) { | |||
837 | RangeSample Ranges; | |||
838 | findDisjointRanges(Ranges, RangeCounter); | |||
839 | for (const auto &Range : Ranges) { | |||
840 | uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); | |||
841 | uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); | |||
842 | uint64_t Count = Range.second; | |||
843 | // Disjoint ranges have introduce zero-filled gap that | |||
844 | // doesn't belong to current context, filter them out. | |||
845 | if (Count == 0) | |||
846 | continue; | |||
847 | ||||
848 | InstructionPointer IP(Binary, RangeBegin, true); | |||
849 | // Disjoint ranges may have range in the middle of two instr, | |||
850 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range | |||
851 | // can be Addr1+1 to Addr2-1. We should ignore such range. | |||
852 | if (IP.Address > RangeEnd) | |||
853 | continue; | |||
854 | ||||
855 | do { | |||
856 | const AddressProbesMap &Address2ProbesMap = | |||
857 | Binary->getAddress2ProbesMap(); | |||
858 | auto It = Address2ProbesMap.find(IP.Address); | |||
859 | if (It != Address2ProbesMap.end()) { | |||
860 | for (const auto &Probe : It->second) { | |||
861 | if (!Probe.isBlock()) | |||
862 | continue; | |||
863 | ProbeCounter[&Probe] += Count; | |||
864 | } | |||
865 | } | |||
866 | } while (IP.advance() && IP.Address <= RangeEnd); | |||
867 | } | |||
868 | } | |||
869 | ||||
870 | void CSProfileGenerator::populateBodySamplesWithProbes( | |||
871 | const RangeSample &RangeCounter, SampleContextFrames ContextStack) { | |||
872 | ProbeCounterMap ProbeCounter; | |||
873 | // Extract the top frame probes by looking up each address among the range in | |||
874 | // the Address2ProbeMap | |||
875 | extractProbesFromRange(RangeCounter, ProbeCounter); | |||
876 | std::unordered_map<MCDecodedPseudoProbeInlineTree *, | |||
877 | std::unordered_set<FunctionSamples *>> | |||
878 | FrameSamples; | |||
879 | for (const auto &PI : ProbeCounter) { | |||
880 | const MCDecodedPseudoProbe *Probe = PI.first; | |||
881 | uint64_t Count = PI.second; | |||
882 | FunctionSamples &FunctionProfile = | |||
883 | getFunctionProfileForLeafProbe(ContextStack, Probe); | |||
884 | // Record the current frame and FunctionProfile whenever samples are | |||
885 | // collected for non-danglie probes. This is for reporting all of the | |||
886 | // zero count probes of the frame later. | |||
887 | FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); | |||
888 | FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); | |||
889 | FunctionProfile.addTotalSamples(Count); | |||
890 | if (Probe->isEntry()) { | |||
891 | FunctionProfile.addHeadSamples(Count); | |||
892 | // Look up for the caller's function profile | |||
893 | const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); | |||
894 | SampleContextFrames CalleeContextId = | |||
895 | FunctionProfile.getContext().getContextFrames(); | |||
896 | if (InlinerDesc != nullptr && CalleeContextId.size() > 1) { | |||
897 | // Since the context id will be compressed, we have to use callee's | |||
898 | // context id to infer caller's context id to ensure they share the | |||
899 | // same context prefix. | |||
900 | SampleContextFrameVector CallerContextId; | |||
901 | SampleContextFrame &&CallerLeafFrameLoc = | |||
902 | getCallerContext(CalleeContextId, CallerContextId); | |||
903 | uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; | |||
904 | assert(CallerIndex &&(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!" ) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 905, __extension__ __PRETTY_FUNCTION__)) | |||
905 | "Inferred caller's location index shouldn't be zero!")(static_cast <bool> (CallerIndex && "Inferred caller's location index shouldn't be zero!" ) ? void (0) : __assert_fail ("CallerIndex && \"Inferred caller's location index shouldn't be zero!\"" , "llvm/tools/llvm-profgen/ProfileGenerator.cpp", 905, __extension__ __PRETTY_FUNCTION__)); | |||
906 | FunctionSamples &CallerProfile = | |||
907 | getFunctionProfileForContext(CallerContextId); | |||
908 | CallerProfile.setFunctionHash(InlinerDesc->FuncHash); | |||
909 | CallerProfile.addBodySamples(CallerIndex, 0, Count); | |||
910 | CallerProfile.addTotalSamples(Count); | |||
911 | CallerProfile.addCalledTargetSamples( | |||
912 | CallerIndex, 0, FunctionProfile.getContext().getName(), Count); | |||
913 | } | |||
914 | } | |||
915 | } | |||
916 | ||||
917 | // Assign zero count for remaining probes without sample hits to | |||
918 | // differentiate from probes optimized away, of which the counts are unknown | |||
919 | // and will be inferred by the compiler. | |||
920 | for (auto &I : FrameSamples) { | |||
921 | for (auto *FunctionProfile : I.second) { | |||
922 | for (auto *Probe : I.first->getProbes()) { | |||
923 | FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); | |||
924 | } | |||
925 | } | |||
926 | } | |||
927 | } | |||
928 | ||||
929 | void CSProfileGenerator::populateBoundarySamplesWithProbes( | |||
930 | const BranchSample &BranchCounter, SampleContextFrames ContextStack) { | |||
931 | for (const auto &BI : BranchCounter) { | |||
932 | uint64_t SourceOffset = BI.first.first; | |||
933 | uint64_t TargetOffset = BI.first.second; | |||
934 | uint64_t Count = BI.second; | |||
935 | uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); | |||
936 | const MCDecodedPseudoProbe *CallProbe = | |||
937 | Binary->getCallProbeForAddr(SourceAddress); | |||
938 | if (CallProbe == nullptr) | |||
939 | continue; | |||
940 | FunctionSamples &FunctionProfile = | |||
941 | getFunctionProfileForLeafProbe(ContextStack, CallProbe); | |||
942 | FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); | |||
943 | FunctionProfile.addTotalSamples(Count); | |||
944 | StringRef CalleeName = getCalleeNameForOffset(TargetOffset); | |||
945 | if (CalleeName.size() == 0) | |||
946 | continue; | |||
947 | FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, | |||
948 | Count); | |||
949 | } | |||
950 | } | |||
951 | ||||
952 | FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( | |||
953 | SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { | |||
954 | ||||
955 | // Explicitly copy the context for appending the leaf context | |||
956 | SampleContextFrameVector NewContextStack(ContextStack.begin(), | |||
957 | ContextStack.end()); | |||
958 | Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); | |||
959 | // For leaf inlined context with the top frame, we should strip off the top | |||
960 | // frame's probe id, like: | |||
961 | // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" | |||
962 | auto LeafFrame = NewContextStack.back(); | |||
963 | LeafFrame.Location = LineLocation(0, 0); | |||
964 | NewContextStack.pop_back(); | |||
965 | // Compress the context string except for the leaf frame | |||
966 | CSProfileGenerator::compressRecursionContext(NewContextStack); | |||
967 | CSProfileGenerator::trimContext(NewContextStack); | |||
968 | NewContextStack.push_back(LeafFrame); | |||
969 | ||||
970 | const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); | |||
971 | bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); | |||
972 | FunctionSamples &FunctionProile = | |||
973 | getFunctionProfileForContext(NewContextStack, WasLeafInlined); | |||
974 | FunctionProile.setFunctionHash(FuncDesc->FuncHash); | |||
975 | return FunctionProile; | |||
976 | } | |||
977 | ||||
978 | } // end namespace sampleprof | |||
979 | } // end namespace llvm |