LLVM  8.0.0svn
PGOMemOPSizeOpt.cpp
Go to the documentation of this file.
1 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the transformation that optimizes memory intrinsics
11 // such as memcpy using the size value profile. When memory intrinsic size
12 // value profile metadata is available, a single memory intrinsic is expanded
13 // to a sequence of guarded specialized versions that are called with the
14 // hottest size(s), for later expansion into more optimal inline sequences.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Twine.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/DomTreeUpdater.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/InstVisitor.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/PassManager.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/Pass.h"
40 #include "llvm/PassRegistry.h"
41 #include "llvm/PassSupport.h"
43 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
51 #include <cassert>
52 #include <cstdint>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "pgo-memop-opt"
58 
59 STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
60 STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
61 
62 // The minimum call count to optimize memory intrinsic calls.
63 static cl::opt<unsigned>
64  MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
65  cl::init(1000),
66  cl::desc("The minimum count to optimize memory "
67  "intrinsic calls"));
68 
69 // Command line option to disable memory intrinsic optimization. The default is
70 // false. This is for debug purpose.
71 static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
72  cl::Hidden, cl::desc("Disable optimize"));
73 
74 // The percent threshold to optimize memory intrinsic calls.
75 static cl::opt<unsigned>
76  MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
78  cl::desc("The percentage threshold for the "
79  "memory intrinsic calls optimization"));
80 
81 // Maximum number of versions for optimizing memory intrinsic call.
82 static cl::opt<unsigned>
83  MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
85  cl::desc("The max version for the optimized memory "
86  " intrinsic calls"));
87 
88 // Scale the counts from the annotation using the BB count value.
89 static cl::opt<bool>
90  MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
91  cl::desc("Scale the memop size counts using the basic "
92  " block count value"));
93 
94 // This option sets the rangge of precise profile memop sizes.
96 
97 // This option sets the value that groups large memop sizes
99 
100 namespace {
101 class PGOMemOPSizeOptLegacyPass : public FunctionPass {
102 public:
103  static char ID;
104 
105  PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
107  }
108 
109  StringRef getPassName() const override { return "PGOMemOPSize"; }
110 
111 private:
112  bool runOnFunction(Function &F) override;
113  void getAnalysisUsage(AnalysisUsage &AU) const override {
118  }
119 };
120 } // end anonymous namespace
121 
123 INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
124  "Optimize memory intrinsic using its size value profile",
125  false, false)
127 INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
128  "Optimize memory intrinsic using its size value profile",
129  false, false)
130 
132  return new PGOMemOPSizeOptLegacyPass();
133 }
134 
135 namespace {
136 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
137 public:
138  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
140  : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
141  ValueDataArray =
142  llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
143  // Get the MemOPSize range information from option MemOPSizeRange,
144  getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
145  PreciseRangeLast);
146  }
147  bool isChanged() const { return Changed; }
148  void perform() {
149  WorkList.clear();
150  visit(Func);
151 
152  for (auto &MI : WorkList) {
153  ++NumOfPGOMemOPAnnotate;
154  if (perform(MI)) {
155  Changed = true;
156  ++NumOfPGOMemOPOpt;
157  LLVM_DEBUG(dbgs() << "MemOP call: "
158  << MI->getCalledFunction()->getName()
159  << "is Transformed.\n");
160  }
161  }
162  }
163 
164  void visitMemIntrinsic(MemIntrinsic &MI) {
165  Value *Length = MI.getLength();
166  // Not perform on constant length calls.
167  if (dyn_cast<ConstantInt>(Length))
168  return;
169  WorkList.push_back(&MI);
170  }
171 
172 private:
173  Function &Func;
176  DominatorTree *DT;
177  bool Changed;
178  std::vector<MemIntrinsic *> WorkList;
179  // Start of the previse range.
180  int64_t PreciseRangeStart;
181  // Last value of the previse range.
182  int64_t PreciseRangeLast;
183  // The space to read the profile annotation.
184  std::unique_ptr<InstrProfValueData[]> ValueDataArray;
185  bool perform(MemIntrinsic *MI);
186 
187  // This kind shows which group the value falls in. For PreciseValue, we have
188  // the profile count for that value. LargeGroup groups the values that are in
189  // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
190  enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
191 
192  MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
193  if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
194  return LargeGroup;
195  if (Value == PreciseRangeLast + 1)
196  return NonLargeGroup;
197  return PreciseValue;
198  }
199 };
200 
201 static const char *getMIName(const MemIntrinsic *MI) {
202  switch (MI->getIntrinsicID()) {
203  case Intrinsic::memcpy:
204  return "memcpy";
205  case Intrinsic::memmove:
206  return "memmove";
207  case Intrinsic::memset:
208  return "memset";
209  default:
210  return "unknown";
211  }
212 }
213 
214 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
215  assert(Count <= TotalCount);
216  if (Count < MemOPCountThreshold)
217  return false;
218  if (Count < TotalCount * MemOPPercentThreshold / 100)
219  return false;
220  return true;
221 }
222 
223 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
224  uint64_t Denom) {
225  if (!MemOPScaleCount)
226  return Count;
227  bool Overflowed;
228  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
229  return ScaleCount / Denom;
230 }
231 
232 bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
233  assert(MI);
234  if (MI->getIntrinsicID() == Intrinsic::memmove)
235  return false;
236 
238  uint64_t TotalCount;
239  if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
240  ValueDataArray.get(), NumVals, TotalCount))
241  return false;
242 
243  uint64_t ActualCount = TotalCount;
244  uint64_t SavedTotalCount = TotalCount;
245  if (MemOPScaleCount) {
246  auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
247  if (!BBEdgeCount)
248  return false;
249  ActualCount = *BBEdgeCount;
250  }
251 
252  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
253  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
254  << ActualCount << "\n");
255  LLVM_DEBUG(
256  for (auto &VD
257  : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
258 
259  if (ActualCount < MemOPCountThreshold)
260  return false;
261  // Skip if the total value profiled count is 0, in which case we can't
262  // scale up the counts properly (and there is no profitable transformation).
263  if (TotalCount == 0)
264  return false;
265 
266  TotalCount = ActualCount;
267  if (MemOPScaleCount)
268  LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
269  << " denominator = " << SavedTotalCount << "\n");
270 
271  // Keeping track of the count of the default case:
272  uint64_t RemainCount = TotalCount;
273  uint64_t SavedRemainCount = SavedTotalCount;
275  SmallVector<uint64_t, 16> CaseCounts;
276  uint64_t MaxCount = 0;
277  unsigned Version = 0;
278  // Default case is in the front -- save the slot here.
279  CaseCounts.push_back(0);
280  for (auto &VD : VDs) {
281  int64_t V = VD.Value;
282  uint64_t C = VD.Count;
283  if (MemOPScaleCount)
284  C = getScaledCount(C, ActualCount, SavedTotalCount);
285 
286  // Only care precise value here.
287  if (getMemOPSizeKind(V) != PreciseValue)
288  continue;
289 
290  // ValueCounts are sorted on the count. Break at the first un-profitable
291  // value.
292  if (!isProfitable(C, RemainCount))
293  break;
294 
295  SizeIds.push_back(V);
296  CaseCounts.push_back(C);
297  if (C > MaxCount)
298  MaxCount = C;
299 
300  assert(RemainCount >= C);
301  RemainCount -= C;
302  assert(SavedRemainCount >= VD.Count);
303  SavedRemainCount -= VD.Count;
304 
305  if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
306  break;
307  }
308 
309  if (Version == 0)
310  return false;
311 
312  CaseCounts[0] = RemainCount;
313  if (RemainCount > MaxCount)
314  MaxCount = RemainCount;
315 
316  uint64_t SumForOpt = TotalCount - RemainCount;
317 
318  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
319  << " Versions (covering " << SumForOpt << " out of "
320  << TotalCount << ")\n");
321 
322  // mem_op(..., size)
323  // ==>
324  // switch (size) {
325  // case s1:
326  // mem_op(..., s1);
327  // goto merge_bb;
328  // case s2:
329  // mem_op(..., s2);
330  // goto merge_bb;
331  // ...
332  // default:
333  // mem_op(..., size);
334  // goto merge_bb;
335  // }
336  // merge_bb:
337 
338  BasicBlock *BB = MI->getParent();
339  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
340  LLVM_DEBUG(dbgs() << *BB << "\n");
341  auto OrigBBFreq = BFI.getBlockFreq(BB);
342 
343  BasicBlock *DefaultBB = SplitBlock(BB, MI, DT);
344  BasicBlock::iterator It(*MI);
345  ++It;
346  assert(It != DefaultBB->end());
347  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
348  MergeBB->setName("MemOP.Merge");
349  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
350  DefaultBB->setName("MemOP.Default");
351 
353  auto &Ctx = Func.getContext();
354  IRBuilder<> IRB(BB);
356  Value *SizeVar = MI->getLength();
357  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
358 
359  // Clear the value profile data.
360  MI->setMetadata(LLVMContext::MD_prof, nullptr);
361  // If all promoted, we don't need the MD.prof metadata.
362  if (SavedRemainCount > 0 || Version != NumVals)
363  // Otherwise we need update with the un-promoted records back.
364  annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version),
365  SavedRemainCount, IPVK_MemOPSize, NumVals);
366 
367  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
368 
369  std::vector<DominatorTree::UpdateType> Updates;
370  if (DT)
371  Updates.reserve(2 * SizeIds.size());
372 
373  for (uint64_t SizeId : SizeIds) {
374  BasicBlock *CaseBB = BasicBlock::Create(
375  Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
376  Instruction *NewInst = MI->clone();
377  // Fix the argument.
378  MemIntrinsic * MemI = dyn_cast<MemIntrinsic>(NewInst);
379  IntegerType *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType());
380  assert(SizeType && "Expected integer type size argument.");
381  ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
382  MemI->setLength(CaseSizeId);
383  CaseBB->getInstList().push_back(NewInst);
384  IRBuilder<> IRBCase(CaseBB);
385  IRBCase.CreateBr(MergeBB);
386  SI->addCase(CaseSizeId, CaseBB);
387  if (DT) {
388  Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
389  Updates.push_back({DominatorTree::Insert, BB, CaseBB});
390  }
391  LLVM_DEBUG(dbgs() << *CaseBB << "\n");
392  }
393  DTU.applyUpdates(Updates);
394  Updates.clear();
395 
396  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
397 
398  LLVM_DEBUG(dbgs() << *BB << "\n");
399  LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
400  LLVM_DEBUG(dbgs() << *MergeBB << "\n");
401 
402  ORE.emit([&]() {
403  using namespace ore;
404  return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI)
405  << "optimized " << NV("Intrinsic", StringRef(getMIName(MI)))
406  << " with count " << NV("Count", SumForOpt) << " out of "
407  << NV("Total", TotalCount) << " for " << NV("Versions", Version)
408  << " versions";
409  });
410 
411  return true;
412 }
413 } // namespace
414 
417  DominatorTree *DT) {
418  if (DisableMemOPOPT)
419  return false;
420 
421  if (F.hasFnAttribute(Attribute::OptimizeForSize))
422  return false;
423  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT);
424  MemOPSizeOpt.perform();
425  return MemOPSizeOpt.isChanged();
426 }
427 
430  getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
431  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
432  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
433  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
434  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
435 }
436 
437 namespace llvm {
439 
442  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
444  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
445  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
446  if (!Changed)
447  return PreservedAnalyses::all();
448  auto PA = PreservedAnalyses();
449  PA.preserve<GlobalsAA>();
450  PA.preserve<DominatorTreeAnalysis>();
451  return PA;
452 }
453 } // namespace llvm
Legacy wrapper pass to provide the GlobalsAAResult object.
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
void push_back(const T &Elt)
Definition: SmallVector.h:218
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Base class for instruction visitors.
Definition: InstVisitor.h:81
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This is the interface for a simple mod/ref and alias analysis over globals.
FunctionPass * createPGOMemOPSizeOptLegacyPass()
#define DEBUG_TYPE
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
cl::opt< unsigned > MemOPSizeLarge
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
Value * getLength() const
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:793
INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT)
Legacy analysis pass which computes BlockFrequencyInfo.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
This file provides the interface for IR based instrumentation passes ( (profile-gen, and profile-use).
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void setLength(Value *L)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
pgo memop opt
void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast)
Definition: InstrProf.cpp:994
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC)
Extract the value profile data from Inst which is annotated with value profile meta data...
Definition: InstrProf.cpp:857
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
Class to represent integer types.
Definition: DerivedTypes.h:40
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:812
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:329
Analysis pass which computes BlockFrequencyInfo.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1023
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls"))
iterator end()
Definition: BasicBlock.h:266
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates, bool ForceRemoveDuplicates=false)
Apply updates on all available trees.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
void push_back(pointer val)
Definition: ilist.h:313
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:865
pgo instr Read PGO instrumentation profile
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite"))
char & PGOMemOPSizeOptID
cl::opt< std::string > MemOPSizeRange
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:789
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry &)
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:836
OptimizationRemarkEmitter legacy analysis pass.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const uint64_t Version
Definition: InstrProf.h:895
The optimization diagnostic interface.
const BasicBlock * getParent() const
Definition: Instruction.h:67