LLVM 19.0.0git
PGOMemOPSizeOpt.cpp
Go to the documentation of this file.
1//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that optimizes memory intrinsics
10// such as memcpy using the size value profile. When memory intrinsic size
11// value profile metadata is available, a single memory intrinsic is expanded
12// to a sequence of guarded specialized versions that are called with the
13// hottest size(s), for later expansion into more optimal inline sequences.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
25#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/LLVMContext.h"
34#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
37#define INSTR_PROF_VALUE_PROF_MEMOP_API
41#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <cstdint>
48#include <vector>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "pgo-memop-opt"
53
54STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
55STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
56
57// The minimum call count to optimize memory intrinsic calls.
59 MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000),
60 cl::desc("The minimum count to optimize memory "
61 "intrinsic calls"));
62
63// Command line option to disable memory intrinsic optimization. The default is
64// false. This is for debug purpose.
65static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
66 cl::Hidden, cl::desc("Disable optimize"));
67
68// The percent threshold to optimize memory intrinsic calls.
70 MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
72 cl::desc("The percentage threshold for the "
73 "memory intrinsic calls optimization"));
74
75// Maximum number of versions for optimizing memory intrinsic call.
77 MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
78 cl::desc("The max version for the optimized memory "
79 " intrinsic calls"));
80
81// Scale the counts from the annotation using the BB count value.
82static cl::opt<bool>
83 MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
84 cl::desc("Scale the memop size counts using the basic "
85 " block count value"));
86
88 MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
90 cl::desc("Size-specialize memcmp and bcmp calls"));
91
93 MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
94 cl::desc("Optimize the memop size <= this value"));
95
96namespace {
97
98static const char *getMIName(const MemIntrinsic *MI) {
99 switch (MI->getIntrinsicID()) {
100 case Intrinsic::memcpy:
101 return "memcpy";
102 case Intrinsic::memmove:
103 return "memmove";
104 case Intrinsic::memset:
105 return "memset";
106 default:
107 return "unknown";
108 }
109}
110
111// A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
112struct MemOp {
113 Instruction *I;
114 MemOp(MemIntrinsic *MI) : I(MI) {}
115 MemOp(CallInst *CI) : I(CI) {}
116 MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
117 CallInst *asCI() { return cast<CallInst>(I); }
118 MemOp clone() {
119 if (auto MI = asMI())
120 return MemOp(cast<MemIntrinsic>(MI->clone()));
121 return MemOp(cast<CallInst>(asCI()->clone()));
122 }
123 Value *getLength() {
124 if (auto MI = asMI())
125 return MI->getLength();
126 return asCI()->getArgOperand(2);
127 }
128 void setLength(Value *Length) {
129 if (auto MI = asMI())
130 return MI->setLength(Length);
131 asCI()->setArgOperand(2, Length);
132 }
133 StringRef getFuncName() {
134 if (auto MI = asMI())
135 return MI->getCalledFunction()->getName();
136 return asCI()->getCalledFunction()->getName();
137 }
138 bool isMemmove() {
139 if (auto MI = asMI())
140 if (MI->getIntrinsicID() == Intrinsic::memmove)
141 return true;
142 return false;
143 }
144 bool isMemcmp(TargetLibraryInfo &TLI) {
146 if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
147 Func == LibFunc_memcmp) {
148 return true;
149 }
150 return false;
151 }
152 bool isBcmp(TargetLibraryInfo &TLI) {
154 if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
155 Func == LibFunc_bcmp) {
156 return true;
157 }
158 return false;
159 }
160 const char *getName(TargetLibraryInfo &TLI) {
161 if (auto MI = asMI())
162 return getMIName(MI);
164 if (TLI.getLibFunc(*asCI(), Func)) {
165 if (Func == LibFunc_memcmp)
166 return "memcmp";
167 if (Func == LibFunc_bcmp)
168 return "bcmp";
169 }
170 llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
171 return nullptr;
172 }
173};
174
175class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
176public:
177 MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
180 : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {}
181 bool isChanged() const { return Changed; }
182 void perform() {
183 WorkList.clear();
184 visit(Func);
185
186 for (auto &MO : WorkList) {
187 ++NumOfPGOMemOPAnnotate;
188 if (perform(MO)) {
189 Changed = true;
190 ++NumOfPGOMemOPOpt;
191 LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
192 << "is Transformed.\n");
193 }
194 }
195 }
196
198 Value *Length = MI.getLength();
199 // Not perform on constant length calls.
200 if (isa<ConstantInt>(Length))
201 return;
202 WorkList.push_back(MemOp(&MI));
203 }
204
205 void visitCallInst(CallInst &CI) {
207 if (TLI.getLibFunc(CI, Func) &&
208 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
209 !isa<ConstantInt>(CI.getArgOperand(2))) {
210 WorkList.push_back(MemOp(&CI));
211 }
212 }
213
214private:
215 Function &Func;
218 DominatorTree *DT;
220 bool Changed;
221 std::vector<MemOp> WorkList;
222 bool perform(MemOp MO);
223};
224
225static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
226 assert(Count <= TotalCount);
227 if (Count < MemOPCountThreshold)
228 return false;
229 if (Count < TotalCount * MemOPPercentThreshold / 100)
230 return false;
231 return true;
232}
233
234static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
235 uint64_t Denom) {
236 if (!MemOPScaleCount)
237 return Count;
238 bool Overflowed;
239 uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
240 return ScaleCount / Denom;
241}
242
243bool MemOPSizeOpt::perform(MemOp MO) {
244 assert(MO.I);
245 if (MO.isMemmove())
246 return false;
247 if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
248 return false;
249
250 uint32_t NumVals = INSTR_PROF_NUM_BUCKETS;
251 uint32_t MaxNumVals = INSTR_PROF_NUM_BUCKETS;
252 uint64_t TotalCount;
253 auto ValueDataArray = getValueProfDataFromInst(
254 *MO.I, IPVK_MemOPSize, MaxNumVals, NumVals, TotalCount);
255 if (!ValueDataArray)
256 return false;
257
258 uint64_t ActualCount = TotalCount;
259 uint64_t SavedTotalCount = TotalCount;
260 if (MemOPScaleCount) {
261 auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
262 if (!BBEdgeCount)
263 return false;
264 ActualCount = *BBEdgeCount;
265 }
266
267 ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
268 LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
269 << ActualCount << "\n");
271 for (auto &VD
272 : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
273
274 if (ActualCount < MemOPCountThreshold)
275 return false;
276 // Skip if the total value profiled count is 0, in which case we can't
277 // scale up the counts properly (and there is no profitable transformation).
278 if (TotalCount == 0)
279 return false;
280
281 TotalCount = ActualCount;
282 if (MemOPScaleCount)
283 LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
284 << " denominator = " << SavedTotalCount << "\n");
285
286 // Keeping track of the count of the default case:
287 uint64_t RemainCount = TotalCount;
288 uint64_t SavedRemainCount = SavedTotalCount;
290 SmallVector<uint64_t, 16> CaseCounts;
292 uint64_t MaxCount = 0;
293 unsigned Version = 0;
294 // Default case is in the front -- save the slot here.
295 CaseCounts.push_back(0);
297 for (auto I = VDs.begin(), E = VDs.end(); I != E; ++I) {
298 auto &VD = *I;
299 int64_t V = VD.Value;
300 uint64_t C = VD.Count;
301 if (MemOPScaleCount)
302 C = getScaledCount(C, ActualCount, SavedTotalCount);
303
304 if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize) {
305 RemainingVDs.push_back(VD);
306 continue;
307 }
308
309 // ValueCounts are sorted on the count. Break at the first un-profitable
310 // value.
311 if (!isProfitable(C, RemainCount)) {
312 RemainingVDs.insert(RemainingVDs.end(), I, E);
313 break;
314 }
315
316 if (!SeenSizeId.insert(V).second) {
317 errs() << "warning: Invalid Profile Data in Function " << Func.getName()
318 << ": Two identical values in MemOp value counts.\n";
319 return false;
320 }
321
322 SizeIds.push_back(V);
323 CaseCounts.push_back(C);
324 if (C > MaxCount)
325 MaxCount = C;
326
327 assert(RemainCount >= C);
328 RemainCount -= C;
329 assert(SavedRemainCount >= VD.Count);
330 SavedRemainCount -= VD.Count;
331
332 if (++Version >= MemOPMaxVersion && MemOPMaxVersion != 0) {
333 RemainingVDs.insert(RemainingVDs.end(), I + 1, E);
334 break;
335 }
336 }
337
338 if (Version == 0)
339 return false;
340
341 CaseCounts[0] = RemainCount;
342 if (RemainCount > MaxCount)
343 MaxCount = RemainCount;
344
345 uint64_t SumForOpt = TotalCount - RemainCount;
346
347 LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
348 << " Versions (covering " << SumForOpt << " out of "
349 << TotalCount << ")\n");
350
351 // mem_op(..., size)
352 // ==>
353 // switch (size) {
354 // case s1:
355 // mem_op(..., s1);
356 // goto merge_bb;
357 // case s2:
358 // mem_op(..., s2);
359 // goto merge_bb;
360 // ...
361 // default:
362 // mem_op(..., size);
363 // goto merge_bb;
364 // }
365 // merge_bb:
366
367 BasicBlock *BB = MO.I->getParent();
368 LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
369 LLVM_DEBUG(dbgs() << *BB << "\n");
370 auto OrigBBFreq = BFI.getBlockFreq(BB);
371
372 BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
373 BasicBlock::iterator It(*MO.I);
374 ++It;
375 assert(It != DefaultBB->end());
376 BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
377 MergeBB->setName("MemOP.Merge");
378 BFI.setBlockFreq(MergeBB, OrigBBFreq);
379 DefaultBB->setName("MemOP.Default");
380
381 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
382 auto &Ctx = Func.getContext();
383 IRBuilder<> IRB(BB);
385 Value *SizeVar = MO.getLength();
386 SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
387 Type *MemOpTy = MO.I->getType();
388 PHINode *PHI = nullptr;
389 if (!MemOpTy->isVoidTy()) {
390 // Insert a phi for the return values at the merge block.
391 IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
392 PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
393 MO.I->replaceAllUsesWith(PHI);
394 PHI->addIncoming(MO.I, DefaultBB);
395 }
396
397 // Clear the value profile data.
398 MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
399 // If all promoted, we don't need the MD.prof metadata.
400 if (SavedRemainCount > 0 || Version != NumVals) {
401 // Otherwise we need update with the un-promoted records back.
402 annotateValueSite(*Func.getParent(), *MO.I, RemainingVDs, SavedRemainCount,
403 IPVK_MemOPSize, NumVals);
404 }
405
406 LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
407
408 std::vector<DominatorTree::UpdateType> Updates;
409 if (DT)
410 Updates.reserve(2 * SizeIds.size());
411
412 for (uint64_t SizeId : SizeIds) {
414 Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
415 MemOp NewMO = MO.clone();
416 // Fix the argument.
417 auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
418 assert(SizeType && "Expected integer type size argument.");
419 ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
420 NewMO.setLength(CaseSizeId);
421 NewMO.I->insertInto(CaseBB, CaseBB->end());
422 IRBuilder<> IRBCase(CaseBB);
423 IRBCase.CreateBr(MergeBB);
424 SI->addCase(CaseSizeId, CaseBB);
425 if (!MemOpTy->isVoidTy())
426 PHI->addIncoming(NewMO.I, CaseBB);
427 if (DT) {
428 Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
429 Updates.push_back({DominatorTree::Insert, BB, CaseBB});
430 }
431 LLVM_DEBUG(dbgs() << *CaseBB << "\n");
432 }
433 DTU.applyUpdates(Updates);
434 Updates.clear();
435
436 if (MaxCount)
437 setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
438
439 LLVM_DEBUG(dbgs() << *BB << "\n");
440 LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
441 LLVM_DEBUG(dbgs() << *MergeBB << "\n");
442
443 ORE.emit([&]() {
444 using namespace ore;
445 return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
446 << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
447 << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
448 << " for " << NV("Versions", Version) << " versions";
449 });
450
451 return true;
452}
453} // namespace
454
458 if (DisableMemOPOPT)
459 return false;
460
461 if (F.hasFnAttribute(Attribute::OptimizeForSize))
462 return false;
463 MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
464 MemOPSizeOpt.perform();
465 return MemOPSizeOpt.isChanged();
466}
467
473 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
474 bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
475 if (!Changed)
476 return PreservedAnalyses::all();
477 auto PA = PreservedAnalyses();
478 PA.preserve<DominatorTreeAnalysis>();
479 return PA;
480}
Rewrite undef for PHI
#define LLVM_DEBUG(X)
Definition: Debug.h:101
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::desc("The max version for the optimized memory " " intrinsic calls"))
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
#define DEBUG_TYPE
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:451
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:365
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:202
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitMemIntrinsic(MemIntrinsic &I)
Definition: InstVisitor.h:215
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitCallInst(CallInst &I)
Definition: InstVisitor.h:220
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
This is the common base class for memset/memcpy/memmove.
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
size_t size() const
Definition: SmallVector.h:91
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
LLVM Value Representation.
Definition: Value.h:74
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
const uint64_t Version
Definition: InstrProf.h:1108
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
std::unique_ptr< InstrProfValueData[]> getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
Definition: InstrProf.cpp:1372
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1273
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:566
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)