LLVM  16.0.0git
PGOMemOPSizeOpt.cpp
Go to the documentation of this file.
1 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the transformation that optimizes memory intrinsics
10 // such as memcpy using the size value profile. When memory intrinsic size
11 // value profile metadata is available, a single memory intrinsic is expanded
12 // to a sequence of guarded specialized versions that are called with the
13 // hottest size(s), for later expansion into more optimal inline sequences.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/IR/PassManager.h"
35 #include "llvm/IR/Type.h"
37 #define INSTR_PROF_VALUE_PROF_MEMOP_API
39 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Debug.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "pgo-memop-opt"
53 
54 STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
55 STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
56 
57 // The minimum call count to optimize memory intrinsic calls.
58 static cl::opt<unsigned>
59  MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000),
60  cl::desc("The minimum count to optimize memory "
61  "intrinsic calls"));
62 
63 // Command line option to disable memory intrinsic optimization. The default is
64 // false. This is for debug purpose.
65 static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
66  cl::Hidden, cl::desc("Disable optimize"));
67 
68 // The percent threshold to optimize memory intrinsic calls.
69 static cl::opt<unsigned>
70  MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
71  cl::Hidden,
72  cl::desc("The percentage threshold for the "
73  "memory intrinsic calls optimization"));
74 
75 // Maximum number of versions for optimizing memory intrinsic call.
76 static cl::opt<unsigned>
77  MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
78  cl::desc("The max version for the optimized memory "
79  " intrinsic calls"));
80 
81 // Scale the counts from the annotation using the BB count value.
82 static cl::opt<bool>
83  MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
84  cl::desc("Scale the memop size counts using the basic "
85  " block count value"));
86 
88  MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
89  cl::Hidden,
90  cl::desc("Size-specialize memcmp and bcmp calls"));
91 
92 static cl::opt<unsigned>
93  MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
94  cl::desc("Optimize the memop size <= this value"));
95 
96 namespace {
97 
98 static const char *getMIName(const MemIntrinsic *MI) {
99  switch (MI->getIntrinsicID()) {
100  case Intrinsic::memcpy:
101  return "memcpy";
102  case Intrinsic::memmove:
103  return "memmove";
104  case Intrinsic::memset:
105  return "memset";
106  default:
107  return "unknown";
108  }
109 }
110 
111 // A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
112 struct MemOp {
113  Instruction *I;
114  MemOp(MemIntrinsic *MI) : I(MI) {}
115  MemOp(CallInst *CI) : I(CI) {}
116  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
117  CallInst *asCI() { return cast<CallInst>(I); }
118  MemOp clone() {
119  if (auto MI = asMI())
120  return MemOp(cast<MemIntrinsic>(MI->clone()));
121  return MemOp(cast<CallInst>(asCI()->clone()));
122  }
123  Value *getLength() {
124  if (auto MI = asMI())
125  return MI->getLength();
126  return asCI()->getArgOperand(2);
127  }
128  void setLength(Value *Length) {
129  if (auto MI = asMI())
130  return MI->setLength(Length);
131  asCI()->setArgOperand(2, Length);
132  }
133  StringRef getFuncName() {
134  if (auto MI = asMI())
135  return MI->getCalledFunction()->getName();
136  return asCI()->getCalledFunction()->getName();
137  }
138  bool isMemmove() {
139  if (auto MI = asMI())
140  if (MI->getIntrinsicID() == Intrinsic::memmove)
141  return true;
142  return false;
143  }
144  bool isMemcmp(TargetLibraryInfo &TLI) {
145  LibFunc Func;
146  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
147  Func == LibFunc_memcmp) {
148  return true;
149  }
150  return false;
151  }
152  bool isBcmp(TargetLibraryInfo &TLI) {
153  LibFunc Func;
154  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
155  Func == LibFunc_bcmp) {
156  return true;
157  }
158  return false;
159  }
160  const char *getName(TargetLibraryInfo &TLI) {
161  if (auto MI = asMI())
162  return getMIName(MI);
163  LibFunc Func;
164  if (TLI.getLibFunc(*asCI(), Func)) {
165  if (Func == LibFunc_memcmp)
166  return "memcmp";
167  if (Func == LibFunc_bcmp)
168  return "bcmp";
169  }
170  llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
171  return nullptr;
172  }
173 };
174 
175 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
176 public:
177  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
179  TargetLibraryInfo &TLI)
180  : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
181  ValueDataArray =
182  std::make_unique<InstrProfValueData[]>(INSTR_PROF_NUM_BUCKETS);
183  }
184  bool isChanged() const { return Changed; }
185  void perform() {
186  WorkList.clear();
187  visit(Func);
188 
189  for (auto &MO : WorkList) {
190  ++NumOfPGOMemOPAnnotate;
191  if (perform(MO)) {
192  Changed = true;
193  ++NumOfPGOMemOPOpt;
194  LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
195  << "is Transformed.\n");
196  }
197  }
198  }
199 
200  void visitMemIntrinsic(MemIntrinsic &MI) {
201  Value *Length = MI.getLength();
202  // Not perform on constant length calls.
203  if (isa<ConstantInt>(Length))
204  return;
205  WorkList.push_back(MemOp(&MI));
206  }
207 
208  void visitCallInst(CallInst &CI) {
209  LibFunc Func;
210  if (TLI.getLibFunc(CI, Func) &&
211  (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
212  !isa<ConstantInt>(CI.getArgOperand(2))) {
213  WorkList.push_back(MemOp(&CI));
214  }
215  }
216 
217 private:
218  Function &Func;
221  DominatorTree *DT;
222  TargetLibraryInfo &TLI;
223  bool Changed;
224  std::vector<MemOp> WorkList;
225  // The space to read the profile annotation.
226  std::unique_ptr<InstrProfValueData[]> ValueDataArray;
227  bool perform(MemOp MO);
228 };
229 
230 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
231  assert(Count <= TotalCount);
232  if (Count < MemOPCountThreshold)
233  return false;
234  if (Count < TotalCount * MemOPPercentThreshold / 100)
235  return false;
236  return true;
237 }
238 
239 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
240  uint64_t Denom) {
241  if (!MemOPScaleCount)
242  return Count;
243  bool Overflowed;
244  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
245  return ScaleCount / Denom;
246 }
247 
248 bool MemOPSizeOpt::perform(MemOp MO) {
249  assert(MO.I);
250  if (MO.isMemmove())
251  return false;
252  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
253  return false;
254 
255  uint32_t NumVals, MaxNumVals = INSTR_PROF_NUM_BUCKETS;
256  uint64_t TotalCount;
257  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumVals,
258  ValueDataArray.get(), NumVals, TotalCount))
259  return false;
260 
261  uint64_t ActualCount = TotalCount;
262  uint64_t SavedTotalCount = TotalCount;
263  if (MemOPScaleCount) {
264  auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
265  if (!BBEdgeCount)
266  return false;
267  ActualCount = *BBEdgeCount;
268  }
269 
270  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
271  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
272  << ActualCount << "\n");
273  LLVM_DEBUG(
274  for (auto &VD
275  : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
276 
277  if (ActualCount < MemOPCountThreshold)
278  return false;
279  // Skip if the total value profiled count is 0, in which case we can't
280  // scale up the counts properly (and there is no profitable transformation).
281  if (TotalCount == 0)
282  return false;
283 
284  TotalCount = ActualCount;
285  if (MemOPScaleCount)
286  LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
287  << " denominator = " << SavedTotalCount << "\n");
288 
289  // Keeping track of the count of the default case:
290  uint64_t RemainCount = TotalCount;
291  uint64_t SavedRemainCount = SavedTotalCount;
293  SmallVector<uint64_t, 16> CaseCounts;
294  SmallDenseSet<uint64_t, 16> SeenSizeId;
295  uint64_t MaxCount = 0;
296  unsigned Version = 0;
297  // Default case is in the front -- save the slot here.
298  CaseCounts.push_back(0);
300  for (auto I = VDs.begin(), E = VDs.end(); I != E; ++I) {
301  auto &VD = *I;
302  int64_t V = VD.Value;
303  uint64_t C = VD.Count;
304  if (MemOPScaleCount)
305  C = getScaledCount(C, ActualCount, SavedTotalCount);
306 
307  if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize) {
308  RemainingVDs.push_back(VD);
309  continue;
310  }
311 
312  // ValueCounts are sorted on the count. Break at the first un-profitable
313  // value.
314  if (!isProfitable(C, RemainCount)) {
315  RemainingVDs.insert(RemainingVDs.end(), I, E);
316  break;
317  }
318 
319  if (!SeenSizeId.insert(V).second) {
320  errs() << "Invalid Profile Data in Function " << Func.getName()
321  << ": Two identical values in MemOp value counts.\n";
322  return false;
323  }
324 
325  SizeIds.push_back(V);
326  CaseCounts.push_back(C);
327  if (C > MaxCount)
328  MaxCount = C;
329 
330  assert(RemainCount >= C);
331  RemainCount -= C;
332  assert(SavedRemainCount >= VD.Count);
333  SavedRemainCount -= VD.Count;
334 
335  if (++Version >= MemOPMaxVersion && MemOPMaxVersion != 0) {
336  RemainingVDs.insert(RemainingVDs.end(), I + 1, E);
337  break;
338  }
339  }
340 
341  if (Version == 0)
342  return false;
343 
344  CaseCounts[0] = RemainCount;
345  if (RemainCount > MaxCount)
346  MaxCount = RemainCount;
347 
348  uint64_t SumForOpt = TotalCount - RemainCount;
349 
350  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
351  << " Versions (covering " << SumForOpt << " out of "
352  << TotalCount << ")\n");
353 
354  // mem_op(..., size)
355  // ==>
356  // switch (size) {
357  // case s1:
358  // mem_op(..., s1);
359  // goto merge_bb;
360  // case s2:
361  // mem_op(..., s2);
362  // goto merge_bb;
363  // ...
364  // default:
365  // mem_op(..., size);
366  // goto merge_bb;
367  // }
368  // merge_bb:
369 
370  BasicBlock *BB = MO.I->getParent();
371  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
372  LLVM_DEBUG(dbgs() << *BB << "\n");
373  auto OrigBBFreq = BFI.getBlockFreq(BB);
374 
375  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
376  BasicBlock::iterator It(*MO.I);
377  ++It;
378  assert(It != DefaultBB->end());
379  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
380  MergeBB->setName("MemOP.Merge");
381  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
382  DefaultBB->setName("MemOP.Default");
383 
385  auto &Ctx = Func.getContext();
386  IRBuilder<> IRB(BB);
387  BB->getTerminator()->eraseFromParent();
388  Value *SizeVar = MO.getLength();
389  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
390  Type *MemOpTy = MO.I->getType();
391  PHINode *PHI = nullptr;
392  if (!MemOpTy->isVoidTy()) {
393  // Insert a phi for the return values at the merge block.
394  IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
395  PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
396  MO.I->replaceAllUsesWith(PHI);
397  PHI->addIncoming(MO.I, DefaultBB);
398  }
399 
400  // Clear the value profile data.
401  MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
402  // If all promoted, we don't need the MD.prof metadata.
403  if (SavedRemainCount > 0 || Version != NumVals) {
404  // Otherwise we need update with the un-promoted records back.
405  ArrayRef<InstrProfValueData> RemVDs(RemainingVDs);
406  annotateValueSite(*Func.getParent(), *MO.I, RemVDs, SavedRemainCount,
407  IPVK_MemOPSize, NumVals);
408  }
409 
410  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
411 
412  std::vector<DominatorTree::UpdateType> Updates;
413  if (DT)
414  Updates.reserve(2 * SizeIds.size());
415 
416  for (uint64_t SizeId : SizeIds) {
417  BasicBlock *CaseBB = BasicBlock::Create(
418  Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
419  MemOp NewMO = MO.clone();
420  // Fix the argument.
421  auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
422  assert(SizeType && "Expected integer type size argument.");
423  ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
424  NewMO.setLength(CaseSizeId);
425  CaseBB->getInstList().push_back(NewMO.I);
426  IRBuilder<> IRBCase(CaseBB);
427  IRBCase.CreateBr(MergeBB);
428  SI->addCase(CaseSizeId, CaseBB);
429  if (!MemOpTy->isVoidTy())
430  PHI->addIncoming(NewMO.I, CaseBB);
431  if (DT) {
432  Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
433  Updates.push_back({DominatorTree::Insert, BB, CaseBB});
434  }
435  LLVM_DEBUG(dbgs() << *CaseBB << "\n");
436  }
437  DTU.applyUpdates(Updates);
438  Updates.clear();
439 
440  if (MaxCount)
441  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
442 
443  LLVM_DEBUG(dbgs() << *BB << "\n");
444  LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
445  LLVM_DEBUG(dbgs() << *MergeBB << "\n");
446 
447  ORE.emit([&]() {
448  using namespace ore;
449  return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
450  << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
451  << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
452  << " for " << NV("Versions", Version) << " versions";
453  });
454 
455  return true;
456 }
457 } // namespace
458 
461  DominatorTree *DT, TargetLibraryInfo &TLI) {
462  if (DisableMemOPOPT)
463  return false;
464 
465  if (F.hasFnAttribute(Attribute::OptimizeForSize))
466  return false;
467  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
468  MemOPSizeOpt.perform();
469  return MemOPSizeOpt.isChanged();
470 }
471 
477  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
478  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
479  if (!Changed)
480  return PreservedAnalyses::all();
481  auto PA = PreservedAnalyses();
482  PA.preserve<DominatorTreeAnalysis>();
483  return PA;
484 }
llvm::SaturatingMultiply
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:761
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:308
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MemOPCountThreshold
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::Function
Definition: Function.h:60
StringRef.h
MemOpMaxOptSize
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
llvm::SmallVector< uint64_t, 16 >
Statistic.h
ErrorHandling.h
llvm::SmallDenseSet
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:286
llvm::IRBuilder<>
DomTreeUpdater.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::MemOp
Definition: TargetLowering.h:111
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:1041
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::detail::DenseSetImpl< ValueT, SmallDenseMap< ValueT, detail::DenseSetEmpty, 4, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PGOMemOPSizeOpt.cpp:52
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
InstrProfData.inc
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
PGOMemOPSizeOptImpl
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
Definition: PGOMemOPSizeOpt.cpp:459
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MemOPMaxVersion
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::desc("The max version for the optimized memory " " intrinsic calls"))
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
SI
@ SI
Definition: SIInstrInfo.cpp:7882
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:297
llvm::Instruction
Definition: Instruction.h:42
InstrProf.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::IndexedInstrProf::Version
const uint64_t Version
Definition: InstrProf.h:1056
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
Type.h
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
uint64_t
MemOPScaleCount
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
PGOInstrumentation.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
ArrayRef.h
IRBuilder.h
llvm::PGOMemOPSizeOpt::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: PGOMemOPSizeOpt.cpp:472
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
InstVisitor.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
uint32_t
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:432
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1017
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
DisableMemOPOPT
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:793
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:225
MemOPPercentThreshold
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:373
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
Instructions.h
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::PHINode
Definition: Instructions.h:2698
llvm::setProfMetadata
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
Definition: PGOInstrumentation.cpp:2295
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1473
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1062
LLVMContext.h
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3277
llvm::cl::desc
Definition: CommandLine.h:413
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:918
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
MemOPOptMemcmpBcmp
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:449
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809