LLVM  12.0.0git
PGOMemOPSizeOpt.cpp
Go to the documentation of this file.
1 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the transformation that optimizes memory intrinsics
10 // such as memcpy using the size value profile. When memory intrinsic size
11 // value profile metadata is available, a single memory intrinsic is expanded
12 // to a sequence of guarded specialized versions that are called with the
13 // hottest size(s), for later expansion into more optimal inline sequences.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/InitializePasses.h"
39 #include "llvm/Pass.h"
40 #include "llvm/PassRegistry.h"
42 #define INSTR_PROF_VALUE_PROF_MEMOP_API
44 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
52 #include <cassert>
53 #include <cstdint>
54 #include <vector>
55 
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "pgo-memop-opt"
59 
60 STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
61 STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
62 
63 // The minimum call count to optimize memory intrinsic calls.
64 static cl::opt<unsigned>
65  MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
66  cl::init(1000),
67  cl::desc("The minimum count to optimize memory "
68  "intrinsic calls"));
69 
70 // Command line option to disable memory intrinsic optimization. The default is
71 // false. This is for debug purpose.
72 static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
73  cl::Hidden, cl::desc("Disable optimize"));
74 
75 // The percent threshold to optimize memory intrinsic calls.
76 static cl::opt<unsigned>
77  MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
79  cl::desc("The percentage threshold for the "
80  "memory intrinsic calls optimization"));
81 
82 // Maximum number of versions for optimizing memory intrinsic call.
83 static cl::opt<unsigned>
84  MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
86  cl::desc("The max version for the optimized memory "
87  " intrinsic calls"));
88 
89 // Scale the counts from the annotation using the BB count value.
90 static cl::opt<bool>
91  MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
92  cl::desc("Scale the memop size counts using the basic "
93  " block count value"));
94 
96  MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
97  cl::Hidden,
98  cl::desc("Size-specialize memcmp and bcmp calls"));
99 
100 static cl::opt<unsigned>
101  MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
102  cl::desc("Optimize the memop size <= this value"));
103 
104 namespace {
105 class PGOMemOPSizeOptLegacyPass : public FunctionPass {
106 public:
107  static char ID;
108 
109  PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
111  }
112 
113  StringRef getPassName() const override { return "PGOMemOPSize"; }
114 
115 private:
116  bool runOnFunction(Function &F) override;
117  void getAnalysisUsage(AnalysisUsage &AU) const override {
123  }
124 };
125 } // end anonymous namespace
126 
128 INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
129  "Optimize memory intrinsic using its size value profile",
130  false, false)
133 INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
134  "Optimize memory intrinsic using its size value profile",
135  false, false)
136 
138  return new PGOMemOPSizeOptLegacyPass();
139 }
140 
141 namespace {
142 
143 static const char *getMIName(const MemIntrinsic *MI) {
144  switch (MI->getIntrinsicID()) {
145  case Intrinsic::memcpy:
146  return "memcpy";
147  case Intrinsic::memmove:
148  return "memmove";
149  case Intrinsic::memset:
150  return "memset";
151  default:
152  return "unknown";
153  }
154 }
155 
156 // A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
157 struct MemOp {
158  Instruction *I;
159  MemOp(MemIntrinsic *MI) : I(MI) {}
160  MemOp(CallInst *CI) : I(CI) {}
161  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
162  CallInst *asCI() { return cast<CallInst>(I); }
163  MemOp clone() {
164  if (auto MI = asMI())
165  return MemOp(cast<MemIntrinsic>(MI->clone()));
166  return MemOp(cast<CallInst>(asCI()->clone()));
167  }
168  Value *getLength() {
169  if (auto MI = asMI())
170  return MI->getLength();
171  return asCI()->getArgOperand(2);
172  }
173  void setLength(Value *Length) {
174  if (auto MI = asMI())
175  return MI->setLength(Length);
176  asCI()->setArgOperand(2, Length);
177  }
178  StringRef getFuncName() {
179  if (auto MI = asMI())
180  return MI->getCalledFunction()->getName();
181  return asCI()->getCalledFunction()->getName();
182  }
183  bool isMemmove() {
184  if (auto MI = asMI())
185  if (MI->getIntrinsicID() == Intrinsic::memmove)
186  return true;
187  return false;
188  }
189  bool isMemcmp(TargetLibraryInfo &TLI) {
190  LibFunc Func;
191  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
192  Func == LibFunc_memcmp) {
193  return true;
194  }
195  return false;
196  }
197  bool isBcmp(TargetLibraryInfo &TLI) {
198  LibFunc Func;
199  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
200  Func == LibFunc_bcmp) {
201  return true;
202  }
203  return false;
204  }
205  const char *getName(TargetLibraryInfo &TLI) {
206  if (auto MI = asMI())
207  return getMIName(MI);
208  LibFunc Func;
209  if (TLI.getLibFunc(*asCI(), Func)) {
210  if (Func == LibFunc_memcmp)
211  return "memcmp";
212  if (Func == LibFunc_bcmp)
213  return "bcmp";
214  }
215  llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
216  return nullptr;
217  }
218 };
219 
220 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
221 public:
222  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
224  TargetLibraryInfo &TLI)
225  : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
226  ValueDataArray =
227  std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
228  }
229  bool isChanged() const { return Changed; }
230  void perform() {
231  WorkList.clear();
232  visit(Func);
233 
234  for (auto &MO : WorkList) {
235  ++NumOfPGOMemOPAnnotate;
236  if (perform(MO)) {
237  Changed = true;
238  ++NumOfPGOMemOPOpt;
239  LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
240  << "is Transformed.\n");
241  }
242  }
243  }
244 
245  void visitMemIntrinsic(MemIntrinsic &MI) {
246  Value *Length = MI.getLength();
247  // Not perform on constant length calls.
248  if (dyn_cast<ConstantInt>(Length))
249  return;
250  WorkList.push_back(MemOp(&MI));
251  }
252 
253  void visitCallInst(CallInst &CI) {
254  LibFunc Func;
255  if (TLI.getLibFunc(CI, Func) &&
256  (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
257  !isa<ConstantInt>(CI.getArgOperand(2))) {
258  WorkList.push_back(MemOp(&CI));
259  }
260  }
261 
262 private:
263  Function &Func;
266  DominatorTree *DT;
267  TargetLibraryInfo &TLI;
268  bool Changed;
269  std::vector<MemOp> WorkList;
270  // The space to read the profile annotation.
271  std::unique_ptr<InstrProfValueData[]> ValueDataArray;
272  bool perform(MemOp MO);
273 };
274 
275 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
276  assert(Count <= TotalCount);
277  if (Count < MemOPCountThreshold)
278  return false;
279  if (Count < TotalCount * MemOPPercentThreshold / 100)
280  return false;
281  return true;
282 }
283 
284 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
285  uint64_t Denom) {
286  if (!MemOPScaleCount)
287  return Count;
288  bool Overflowed;
289  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
290  return ScaleCount / Denom;
291 }
292 
293 bool MemOPSizeOpt::perform(MemOp MO) {
294  assert(MO.I);
295  if (MO.isMemmove())
296  return false;
297  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
298  return false;
299 
301  uint64_t TotalCount;
302  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions,
303  ValueDataArray.get(), NumVals, TotalCount))
304  return false;
305 
306  uint64_t ActualCount = TotalCount;
307  uint64_t SavedTotalCount = TotalCount;
308  if (MemOPScaleCount) {
309  auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
310  if (!BBEdgeCount)
311  return false;
312  ActualCount = *BBEdgeCount;
313  }
314 
315  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
316  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
317  << ActualCount << "\n");
318  LLVM_DEBUG(
319  for (auto &VD
320  : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
321 
322  if (ActualCount < MemOPCountThreshold)
323  return false;
324  // Skip if the total value profiled count is 0, in which case we can't
325  // scale up the counts properly (and there is no profitable transformation).
326  if (TotalCount == 0)
327  return false;
328 
329  TotalCount = ActualCount;
330  if (MemOPScaleCount)
331  LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
332  << " denominator = " << SavedTotalCount << "\n");
333 
334  // Keeping track of the count of the default case:
335  uint64_t RemainCount = TotalCount;
336  uint64_t SavedRemainCount = SavedTotalCount;
338  SmallVector<uint64_t, 16> CaseCounts;
339  uint64_t MaxCount = 0;
340  unsigned Version = 0;
341  // Default case is in the front -- save the slot here.
342  CaseCounts.push_back(0);
343  for (auto &VD : VDs) {
344  int64_t V = VD.Value;
345  uint64_t C = VD.Count;
346  if (MemOPScaleCount)
347  C = getScaledCount(C, ActualCount, SavedTotalCount);
348 
349  if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize)
350  continue;
351 
352  // ValueCounts are sorted on the count. Break at the first un-profitable
353  // value.
354  if (!isProfitable(C, RemainCount))
355  break;
356 
357  SizeIds.push_back(V);
358  CaseCounts.push_back(C);
359  if (C > MaxCount)
360  MaxCount = C;
361 
362  assert(RemainCount >= C);
363  RemainCount -= C;
364  assert(SavedRemainCount >= VD.Count);
365  SavedRemainCount -= VD.Count;
366 
367  if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
368  break;
369  }
370 
371  if (Version == 0)
372  return false;
373 
374  CaseCounts[0] = RemainCount;
375  if (RemainCount > MaxCount)
376  MaxCount = RemainCount;
377 
378  uint64_t SumForOpt = TotalCount - RemainCount;
379 
380  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
381  << " Versions (covering " << SumForOpt << " out of "
382  << TotalCount << ")\n");
383 
384  // mem_op(..., size)
385  // ==>
386  // switch (size) {
387  // case s1:
388  // mem_op(..., s1);
389  // goto merge_bb;
390  // case s2:
391  // mem_op(..., s2);
392  // goto merge_bb;
393  // ...
394  // default:
395  // mem_op(..., size);
396  // goto merge_bb;
397  // }
398  // merge_bb:
399 
400  BasicBlock *BB = MO.I->getParent();
401  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
402  LLVM_DEBUG(dbgs() << *BB << "\n");
403  auto OrigBBFreq = BFI.getBlockFreq(BB);
404 
405  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
406  BasicBlock::iterator It(*MO.I);
407  ++It;
408  assert(It != DefaultBB->end());
409  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
410  MergeBB->setName("MemOP.Merge");
411  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
412  DefaultBB->setName("MemOP.Default");
413 
415  auto &Ctx = Func.getContext();
416  IRBuilder<> IRB(BB);
418  Value *SizeVar = MO.getLength();
419  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
420  Type *MemOpTy = MO.I->getType();
421  PHINode *PHI = nullptr;
422  if (!MemOpTy->isVoidTy()) {
423  // Insert a phi for the return values at the merge block.
424  IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
425  PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
426  MO.I->replaceAllUsesWith(PHI);
427  PHI->addIncoming(MO.I, DefaultBB);
428  }
429 
430  // Clear the value profile data.
431  MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
432  // If all promoted, we don't need the MD.prof metadata.
433  if (SavedRemainCount > 0 || Version != NumVals)
434  // Otherwise we need update with the un-promoted records back.
435  annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version),
436  SavedRemainCount, IPVK_MemOPSize, NumVals);
437 
438  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
439 
440  std::vector<DominatorTree::UpdateType> Updates;
441  if (DT)
442  Updates.reserve(2 * SizeIds.size());
443 
444  for (uint64_t SizeId : SizeIds) {
445  BasicBlock *CaseBB = BasicBlock::Create(
446  Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
447  MemOp NewMO = MO.clone();
448  // Fix the argument.
449  auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
450  assert(SizeType && "Expected integer type size argument.");
451  ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
452  NewMO.setLength(CaseSizeId);
453  CaseBB->getInstList().push_back(NewMO.I);
454  IRBuilder<> IRBCase(CaseBB);
455  IRBCase.CreateBr(MergeBB);
456  SI->addCase(CaseSizeId, CaseBB);
457  if (!MemOpTy->isVoidTy())
458  PHI->addIncoming(NewMO.I, CaseBB);
459  if (DT) {
460  Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
461  Updates.push_back({DominatorTree::Insert, BB, CaseBB});
462  }
463  LLVM_DEBUG(dbgs() << *CaseBB << "\n");
464  }
465  DTU.applyUpdates(Updates);
466  Updates.clear();
467 
468  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
469 
470  LLVM_DEBUG(dbgs() << *BB << "\n");
471  LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
472  LLVM_DEBUG(dbgs() << *MergeBB << "\n");
473 
474  ORE.emit([&]() {
475  using namespace ore;
476  return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
477  << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
478  << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
479  << " for " << NV("Versions", Version) << " versions";
480  });
481 
482  return true;
483 }
484 } // namespace
485 
488  DominatorTree *DT, TargetLibraryInfo &TLI) {
489  if (DisableMemOPOPT)
490  return false;
491 
492  if (F.hasFnAttribute(Attribute::OptimizeForSize))
493  return false;
494  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
495  MemOPSizeOpt.perform();
496  return MemOPSizeOpt.isChanged();
497 }
498 
501  getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
502  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
503  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
504  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
505  TargetLibraryInfo &TLI =
506  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
507  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
508 }
509 
510 namespace llvm {
512 
515  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
517  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
518  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
519  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
520  if (!Changed)
521  return PreservedAnalyses::all();
522  auto PA = PreservedAnalyses();
523  PA.preserve<GlobalsAA>();
524  PA.preserve<DominatorTreeAnalysis>();
525  return PA;
526 }
527 } // namespace llvm
Legacy wrapper pass to provide the GlobalsAAResult object.
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Base class for instruction visitors.
Definition: InstVisitor.h:79
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
FunctionPass * createPGOMemOPSizeOptLegacyPass()
#define DEBUG_TYPE
void push_back(const T &Elt)
Definition: SmallVector.h:379
This class represents a function call, abstracting a target machine's calling convention.
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:821
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:249
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1323
AnalysisUsage & addRequired()
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
static StringRef getName(Value *V)
Legacy analysis pass which computes BlockFrequencyInfo.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:367
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
pgo memop opt
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:981
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:936
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
Analysis pass which computes BlockFrequencyInfo.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls"))
iterator end()
Definition: BasicBlock.h:298
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:867
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
void push_back(pointer val)
Definition: ilist.h:313
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
pgo instr Read PGO instrumentation profile
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite"))
char & PGOMemOPSizeOptID
#define I(x, y, z)
Definition: MD5.cpp:59
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:804
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
size_t size() const
Definition: SmallVector.h:72
void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry &)
Analysis pass providing the TargetLibraryInfo.
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1479
LLVM Value Representation.
Definition: Value.h:75
OptimizationRemarkEmitter legacy analysis pass.
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:278
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const uint64_t Version
Definition: InstrProf.h:989
The optimization diagnostic interface.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)