LLVM  15.0.0git
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a Loop Data Prefetching Pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/InitializePasses.h"
15 
17 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/Module.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Transforms/Scalar.h"
31 #include "llvm/Transforms/Utils.h"
33 
34 #define DEBUG_TYPE "loop-data-prefetch"
35 
36 using namespace llvm;
37 
38 // By default, we limit this to creating 16 PHIs (which is a little over half
39 // of the allocatable register set).
40 static cl::opt<bool>
41 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
42  cl::desc("Prefetch write addresses"));
43 
44 static cl::opt<unsigned>
45  PrefetchDistance("prefetch-distance",
46  cl::desc("Number of instructions to prefetch ahead"),
47  cl::Hidden);
48 
49 static cl::opt<unsigned>
50  MinPrefetchStride("min-prefetch-stride",
51  cl::desc("Min stride to add prefetches"), cl::Hidden);
52 
54  "max-prefetch-iters-ahead",
55  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
56 
57 STATISTIC(NumPrefetches, "Number of prefetches inserted");
58 
59 namespace {
60 
61 /// Loop prefetch implementation class.
62 class LoopDataPrefetch {
63 public:
64  LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
67  : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
68 
69  bool run();
70 
71 private:
72  bool runOnLoop(Loop *L);
73 
74  /// Check if the stride of the accesses is large enough to
75  /// warrant a prefetch.
76  bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
77 
78  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
79  unsigned NumStridedMemAccesses,
80  unsigned NumPrefetches,
81  bool HasCall) {
82  if (MinPrefetchStride.getNumOccurrences() > 0)
83  return MinPrefetchStride;
84  return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
85  NumPrefetches, HasCall);
86  }
87 
88  unsigned getPrefetchDistance() {
89  if (PrefetchDistance.getNumOccurrences() > 0)
90  return PrefetchDistance;
91  return TTI->getPrefetchDistance();
92  }
93 
94  unsigned getMaxPrefetchIterationsAhead() {
95  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
98  }
99 
100  bool doPrefetchWrites() {
102  return PrefetchWrites;
103  return TTI->enableWritePrefetching();
104  }
105 
106  AssumptionCache *AC;
107  DominatorTree *DT;
108  LoopInfo *LI;
109  ScalarEvolution *SE;
110  const TargetTransformInfo *TTI;
112 };
113 
114 /// Legacy class for inserting loop data prefetches.
115 class LoopDataPrefetchLegacyPass : public FunctionPass {
116 public:
117  static char ID; // Pass ID, replacement for typeid
118  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
120  }
121 
122  void getAnalysisUsage(AnalysisUsage &AU) const override {
134  }
135 
136  bool runOnFunction(Function &F) override;
137  };
138 }
139 
141 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
142  "Loop Data Prefetch", false, false)
146 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
149 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
151 
153  return new LoopDataPrefetchLegacyPass();
154 }
155 
156 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
157  unsigned TargetMinStride) {
158  // No need to check if any stride goes.
159  if (TargetMinStride <= 1)
160  return true;
161 
162  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
163  // If MinStride is set, don't prefetch unless we can ensure that stride is
164  // larger.
165  if (!ConstStride)
166  return false;
167 
168  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
169  return TargetMinStride <= AbsStride;
170 }
171 
175  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
181 
182  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
183  bool Changed = LDP.run();
184 
185  if (Changed) {
188  PA.preserve<LoopAnalysis>();
189  return PA;
190  }
191 
192  return PreservedAnalyses::all();
193 }
194 
196  if (skipFunction(F))
197  return false;
198 
199  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
200  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
201  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
202  AssumptionCache *AC =
203  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
205  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
206  const TargetTransformInfo *TTI =
207  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
208 
209  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
210  return LDP.run();
211 }
212 
213 bool LoopDataPrefetch::run() {
214  // If PrefetchDistance is not set, don't run the pass. This gives an
215  // opportunity for targets to run this pass for selected subtargets only
216  // (whose TTI sets PrefetchDistance).
217  if (getPrefetchDistance() == 0)
218  return false;
219  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
220 
221  bool MadeChange = false;
222 
223  for (Loop *I : *LI)
224  for (Loop *L : depth_first(I))
225  MadeChange |= runOnLoop(L);
226 
227  return MadeChange;
228 }
229 
230 /// A record for a potential prefetch made during the initial scan of the
231 /// loop. This is used to let a single prefetch target multiple memory accesses.
232 struct Prefetch {
233  /// The address formula for this prefetch as returned by ScalarEvolution.
235  /// The point of insertion for the prefetch instruction.
236  Instruction *InsertPt = nullptr;
237  /// True if targeting a write memory access.
238  bool Writes = false;
239  /// The (first seen) prefetched instruction.
240  Instruction *MemI = nullptr;
241 
242  /// Constructor to create a new Prefetch for \p I.
243  Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
244  addInstruction(I);
245  };
246 
247  /// Add the instruction \param I to this prefetch. If it's not the first
248  /// one, 'InsertPt' and 'Writes' will be updated as required.
249  /// \param PtrDiff the known constant address difference to the first added
250  /// instruction.
252  int64_t PtrDiff = 0) {
253  if (!InsertPt) {
254  MemI = I;
255  InsertPt = I;
256  Writes = isa<StoreInst>(I);
257  } else {
258  BasicBlock *PrefBB = InsertPt->getParent();
259  BasicBlock *InsBB = I->getParent();
260  if (PrefBB != InsBB) {
261  BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
262  if (DomBB != PrefBB)
263  InsertPt = DomBB->getTerminator();
264  }
265 
266  if (isa<StoreInst>(I) && PtrDiff == 0)
267  Writes = true;
268  }
269  }
270 };
271 
272 bool LoopDataPrefetch::runOnLoop(Loop *L) {
273  bool MadeChange = false;
274 
275  // Only prefetch in the inner-most loop
276  if (!L->isInnermost())
277  return MadeChange;
278 
280  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
281 
282  // Calculate the number of iterations ahead to prefetch
284  bool HasCall = false;
285  for (const auto BB : L->blocks()) {
286  // If the loop already has prefetches, then assume that the user knows
287  // what they are doing and don't add any more.
288  for (auto &I : *BB) {
289  if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
290  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
291  if (F->getIntrinsicID() == Intrinsic::prefetch)
292  return MadeChange;
293  if (TTI->isLoweredToCall(F))
294  HasCall = true;
295  } else { // indirect call.
296  HasCall = true;
297  }
298  }
299  }
300  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
301  }
302  unsigned LoopSize = Metrics.NumInsts;
303  if (!LoopSize)
304  LoopSize = 1;
305 
306  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
307  if (!ItersAhead)
308  ItersAhead = 1;
309 
310  if (ItersAhead > getMaxPrefetchIterationsAhead())
311  return MadeChange;
312 
313  unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
314  if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
315  return MadeChange;
316 
317  unsigned NumMemAccesses = 0;
318  unsigned NumStridedMemAccesses = 0;
319  SmallVector<Prefetch, 16> Prefetches;
320  for (const auto BB : L->blocks())
321  for (auto &I : *BB) {
322  Value *PtrValue;
323  Instruction *MemI;
324 
325  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
326  MemI = LMemI;
327  PtrValue = LMemI->getPointerOperand();
328  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
329  if (!doPrefetchWrites()) continue;
330  MemI = SMemI;
331  PtrValue = SMemI->getPointerOperand();
332  } else continue;
333 
334  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
335  if (PtrAddrSpace)
336  continue;
337  NumMemAccesses++;
338  if (L->isLoopInvariant(PtrValue))
339  continue;
340 
341  const SCEV *LSCEV = SE->getSCEV(PtrValue);
342  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
343  if (!LSCEVAddRec)
344  continue;
345  NumStridedMemAccesses++;
346 
347  // We don't want to double prefetch individual cache lines. If this
348  // access is known to be within one cache line of some other one that
349  // has already been prefetched, then don't prefetch this one as well.
350  bool DupPref = false;
351  for (auto &Pref : Prefetches) {
352  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
353  if (const SCEVConstant *ConstPtrDiff =
354  dyn_cast<SCEVConstant>(PtrDiff)) {
355  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
356  if (PD < (int64_t) TTI->getCacheLineSize()) {
357  Pref.addInstruction(MemI, DT, PD);
358  DupPref = true;
359  break;
360  }
361  }
362  }
363  if (!DupPref)
364  Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
365  }
366 
367  unsigned TargetMinStride =
368  getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
369  Prefetches.size(), HasCall);
370 
371  LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
372  << " iterations ahead (loop size: " << LoopSize << ") in "
373  << L->getHeader()->getParent()->getName() << ": " << *L);
374  LLVM_DEBUG(dbgs() << "Loop has: "
375  << NumMemAccesses << " memory accesses, "
376  << NumStridedMemAccesses << " strided memory accesses, "
377  << Prefetches.size() << " potential prefetch(es), "
378  << "a minimum stride of " << TargetMinStride << ", "
379  << (HasCall ? "calls" : "no calls") << ".\n");
380 
381  for (auto &P : Prefetches) {
382  // Check if the stride of the accesses is large enough to warrant a
383  // prefetch.
384  if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
385  continue;
386 
387  const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
388  SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
389  P.LSCEVAddRec->getStepRecurrence(*SE)));
390  if (!isSafeToExpand(NextLSCEV, *SE))
391  continue;
392 
393  BasicBlock *BB = P.InsertPt->getParent();
394  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
395  SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
396  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
397 
398  IRBuilder<> Builder(P.InsertPt);
399  Module *M = BB->getParent()->getParent();
400  Type *I32 = Type::getInt32Ty(BB->getContext());
401  Function *PrefetchFunc = Intrinsic::getDeclaration(
402  M, Intrinsic::prefetch, PrefPtrValue->getType());
403  Builder.CreateCall(
404  PrefetchFunc,
405  {PrefPtrValue,
406  ConstantInt::get(I32, P.Writes),
407  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
408  ++NumPrefetches;
409  LLVM_DEBUG(dbgs() << " Access: "
410  << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
411  << ", SCEV: " << *P.LSCEVAddRec << "\n");
412  ORE->emit([&]() {
413  return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
414  << "prefetched memory access";
415  });
416 
417  MadeChange = true;
418  }
419 
420  return MadeChange;
421 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2461
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2115
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode=true)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2617
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1410
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
ScalarEvolutionExpander.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
MinPrefetchStride
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::X86II::PD
@ PD
Definition: X86BaseInfo.h:787
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:445
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::initializeLoopDataPrefetchLegacyPassPass
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
Prefetch
loop data Loop Data Prefetch
Definition: LoopDataPrefetch.cpp:150
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
MaxPrefetchIterationsAhead
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3051
CommandLine.h
CodeMetrics.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
Prefetch
A record for a potential prefetch made during the initial scan of the loop.
Definition: LoopDataPrefetch.cpp:232
false
Definition: StackSlotColoring.cpp:141
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:669
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2145
Utils.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4407
PrefetchDistance
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopDataPrefetch.cpp:34
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2517
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
PrefetchWrites
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7698
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:160
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopDataPrefetchPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: LoopDataPrefetch.cpp:172
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:279
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:149
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition: PassAnalysisSupport.h:88
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:462
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Prefetch::LSCEVAddRec
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Definition: LoopDataPrefetch.cpp:234
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:680
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
I32
@ I32
Definition: DXILOpLowering.cpp:40
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Prefetch::Prefetch
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Definition: LoopDataPrefetch.cpp:243
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:655
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::LoopSimplifyID
char & LoopSimplifyID
Definition: LoopSimplify.cpp:794
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4524
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
Prefetch::addInstruction
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
Definition: LoopDataPrefetch.cpp:251
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:673
TargetTransformInfo.h
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2454
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::AnalysisUsage::addRequiredID
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:277
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:405
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1281
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:684
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
LoopDataPrefetch.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1246
llvm::createLoopDataPrefetchPass
FunctionPass * createLoopDataPrefetchPass()
Definition: LoopDataPrefetch.cpp:152
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37