LLVM  14.0.0git
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a Loop Data Prefetching Pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/InitializePasses.h"
15 
17 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/CFG.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/Module.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Transforms/Scalar.h"
32 #include "llvm/Transforms/Utils.h"
36 
37 #define DEBUG_TYPE "loop-data-prefetch"
38 
39 using namespace llvm;
40 
41 // By default, we limit this to creating 16 PHIs (which is a little over half
42 // of the allocatable register set).
43 static cl::opt<bool>
44 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
45  cl::desc("Prefetch write addresses"));
46 
47 static cl::opt<unsigned>
48  PrefetchDistance("prefetch-distance",
49  cl::desc("Number of instructions to prefetch ahead"),
50  cl::Hidden);
51 
52 static cl::opt<unsigned>
53  MinPrefetchStride("min-prefetch-stride",
54  cl::desc("Min stride to add prefetches"), cl::Hidden);
55 
57  "max-prefetch-iters-ahead",
58  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
59 
60 STATISTIC(NumPrefetches, "Number of prefetches inserted");
61 
62 namespace {
63 
64 /// Loop prefetch implementation class.
65 class LoopDataPrefetch {
66 public:
67  LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
70  : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
71 
72  bool run();
73 
74 private:
75  bool runOnLoop(Loop *L);
76 
77  /// Check if the stride of the accesses is large enough to
78  /// warrant a prefetch.
79  bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
80 
81  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
82  unsigned NumStridedMemAccesses,
83  unsigned NumPrefetches,
84  bool HasCall) {
85  if (MinPrefetchStride.getNumOccurrences() > 0)
86  return MinPrefetchStride;
87  return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
88  NumPrefetches, HasCall);
89  }
90 
91  unsigned getPrefetchDistance() {
92  if (PrefetchDistance.getNumOccurrences() > 0)
93  return PrefetchDistance;
94  return TTI->getPrefetchDistance();
95  }
96 
97  unsigned getMaxPrefetchIterationsAhead() {
98  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
101  }
102 
103  bool doPrefetchWrites() {
105  return PrefetchWrites;
106  return TTI->enableWritePrefetching();
107  }
108 
109  AssumptionCache *AC;
110  DominatorTree *DT;
111  LoopInfo *LI;
112  ScalarEvolution *SE;
113  const TargetTransformInfo *TTI;
115 };
116 
117 /// Legacy class for inserting loop data prefetches.
118 class LoopDataPrefetchLegacyPass : public FunctionPass {
119 public:
120  static char ID; // Pass ID, replacement for typeid
121  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
123  }
124 
125  void getAnalysisUsage(AnalysisUsage &AU) const override {
137  }
138 
139  bool runOnFunction(Function &F) override;
140  };
141 }
142 
144 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
145  "Loop Data Prefetch", false, false)
149 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
152 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
154 
156  return new LoopDataPrefetchLegacyPass();
157 }
158 
159 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
160  unsigned TargetMinStride) {
161  // No need to check if any stride goes.
162  if (TargetMinStride <= 1)
163  return true;
164 
165  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
166  // If MinStride is set, don't prefetch unless we can ensure that stride is
167  // larger.
168  if (!ConstStride)
169  return false;
170 
171  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
172  return TargetMinStride <= AbsStride;
173 }
174 
178  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
184 
185  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
186  bool Changed = LDP.run();
187 
188  if (Changed) {
191  PA.preserve<LoopAnalysis>();
192  return PA;
193  }
194 
195  return PreservedAnalyses::all();
196 }
197 
199  if (skipFunction(F))
200  return false;
201 
202  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
203  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
204  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
205  AssumptionCache *AC =
206  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
208  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
209  const TargetTransformInfo *TTI =
210  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
211 
212  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
213  return LDP.run();
214 }
215 
216 bool LoopDataPrefetch::run() {
217  // If PrefetchDistance is not set, don't run the pass. This gives an
218  // opportunity for targets to run this pass for selected subtargets only
219  // (whose TTI sets PrefetchDistance).
220  if (getPrefetchDistance() == 0)
221  return false;
222  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
223 
224  bool MadeChange = false;
225 
226  for (Loop *I : *LI)
227  for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
228  MadeChange |= runOnLoop(*L);
229 
230  return MadeChange;
231 }
232 
233 /// A record for a potential prefetch made during the initial scan of the
234 /// loop. This is used to let a single prefetch target multiple memory accesses.
235 struct Prefetch {
236  /// The address formula for this prefetch as returned by ScalarEvolution.
238  /// The point of insertion for the prefetch instruction.
240  /// True if targeting a write memory access.
241  bool Writes;
242  /// The (first seen) prefetched instruction.
244 
245  /// Constructor to create a new Prefetch for \p I.
247  : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) {
248  addInstruction(I);
249  };
250 
251  /// Add the instruction \param I to this prefetch. If it's not the first
252  /// one, 'InsertPt' and 'Writes' will be updated as required.
253  /// \param PtrDiff the known constant address difference to the first added
254  /// instruction.
256  int64_t PtrDiff = 0) {
257  if (!InsertPt) {
258  MemI = I;
259  InsertPt = I;
260  Writes = isa<StoreInst>(I);
261  } else {
262  BasicBlock *PrefBB = InsertPt->getParent();
263  BasicBlock *InsBB = I->getParent();
264  if (PrefBB != InsBB) {
265  BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
266  if (DomBB != PrefBB)
267  InsertPt = DomBB->getTerminator();
268  }
269 
270  if (isa<StoreInst>(I) && PtrDiff == 0)
271  Writes = true;
272  }
273  }
274 };
275 
276 bool LoopDataPrefetch::runOnLoop(Loop *L) {
277  bool MadeChange = false;
278 
279  // Only prefetch in the inner-most loop
280  if (!L->isInnermost())
281  return MadeChange;
282 
284  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
285 
286  // Calculate the number of iterations ahead to prefetch
288  bool HasCall = false;
289  for (const auto BB : L->blocks()) {
290  // If the loop already has prefetches, then assume that the user knows
291  // what they are doing and don't add any more.
292  for (auto &I : *BB) {
293  if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
294  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
295  if (F->getIntrinsicID() == Intrinsic::prefetch)
296  return MadeChange;
297  if (TTI->isLoweredToCall(F))
298  HasCall = true;
299  } else { // indirect call.
300  HasCall = true;
301  }
302  }
303  }
304  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
305  }
306  unsigned LoopSize = Metrics.NumInsts;
307  if (!LoopSize)
308  LoopSize = 1;
309 
310  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
311  if (!ItersAhead)
312  ItersAhead = 1;
313 
314  if (ItersAhead > getMaxPrefetchIterationsAhead())
315  return MadeChange;
316 
317  unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
318  if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
319  return MadeChange;
320 
321  unsigned NumMemAccesses = 0;
322  unsigned NumStridedMemAccesses = 0;
323  SmallVector<Prefetch, 16> Prefetches;
324  for (const auto BB : L->blocks())
325  for (auto &I : *BB) {
326  Value *PtrValue;
327  Instruction *MemI;
328 
329  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
330  MemI = LMemI;
331  PtrValue = LMemI->getPointerOperand();
332  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
333  if (!doPrefetchWrites()) continue;
334  MemI = SMemI;
335  PtrValue = SMemI->getPointerOperand();
336  } else continue;
337 
338  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
339  if (PtrAddrSpace)
340  continue;
341  NumMemAccesses++;
342  if (L->isLoopInvariant(PtrValue))
343  continue;
344 
345  const SCEV *LSCEV = SE->getSCEV(PtrValue);
346  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
347  if (!LSCEVAddRec)
348  continue;
349  NumStridedMemAccesses++;
350 
351  // We don't want to double prefetch individual cache lines. If this
352  // access is known to be within one cache line of some other one that
353  // has already been prefetched, then don't prefetch this one as well.
354  bool DupPref = false;
355  for (auto &Pref : Prefetches) {
356  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
357  if (const SCEVConstant *ConstPtrDiff =
358  dyn_cast<SCEVConstant>(PtrDiff)) {
359  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
360  if (PD < (int64_t) TTI->getCacheLineSize()) {
361  Pref.addInstruction(MemI, DT, PD);
362  DupPref = true;
363  break;
364  }
365  }
366  }
367  if (!DupPref)
368  Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
369  }
370 
371  unsigned TargetMinStride =
372  getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
373  Prefetches.size(), HasCall);
374 
375  LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
376  << " iterations ahead (loop size: " << LoopSize << ") in "
377  << L->getHeader()->getParent()->getName() << ": " << *L);
378  LLVM_DEBUG(dbgs() << "Loop has: "
379  << NumMemAccesses << " memory accesses, "
380  << NumStridedMemAccesses << " strided memory accesses, "
381  << Prefetches.size() << " potential prefetch(es), "
382  << "a minimum stride of " << TargetMinStride << ", "
383  << (HasCall ? "calls" : "no calls") << ".\n");
384 
385  for (auto &P : Prefetches) {
386  // Check if the stride of the accesses is large enough to warrant a
387  // prefetch.
388  if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
389  continue;
390 
391  const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
392  SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
393  P.LSCEVAddRec->getStepRecurrence(*SE)));
394  if (!isSafeToExpand(NextLSCEV, *SE))
395  continue;
396 
397  BasicBlock *BB = P.InsertPt->getParent();
398  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
399  SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
400  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
401 
402  IRBuilder<> Builder(P.InsertPt);
403  Module *M = BB->getParent()->getParent();
404  Type *I32 = Type::getInt32Ty(BB->getContext());
405  Function *PrefetchFunc = Intrinsic::getDeclaration(
406  M, Intrinsic::prefetch, PrefPtrValue->getType());
407  Builder.CreateCall(
408  PrefetchFunc,
409  {PrefPtrValue,
410  ConstantInt::get(I32, P.Writes),
411  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
412  ++NumPrefetches;
413  LLVM_DEBUG(dbgs() << " Access: "
414  << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
415  << ", SCEV: " << *P.LSCEVAddRec << "\n");
416  ORE->emit([&]() {
417  return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
418  << "prefetched memory access";
419  });
420 
421  MadeChange = true;
422  }
423 
424  return MadeChange;
425 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2061
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::wasm::ValType::I32
@ I32
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
ValueMapper.h
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode=true)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2683
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
Scalar.h
MinPrefetchStride
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:65
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::df_end
df_iterator< T > df_end(const T &G)
Definition: DepthFirstIterator.h:223
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:446
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::initializeLoopDataPrefetchLegacyPassPass
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
llvm::X86II::PD
@ PD
Definition: X86BaseInfo.h:782
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
Prefetch
loop data Loop Data Prefetch
Definition: LoopDataPrefetch.cpp:153
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
MaxPrefetchIterationsAhead
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3010
CommandLine.h
CodeMetrics.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Prefetch::MemI
Instruction * MemI
The (first seen) prefetched instruction.
Definition: LoopDataPrefetch.cpp:243
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
Prefetch
A record for a potential prefetch made during the initial scan of the loop.
Definition: LoopDataPrefetch.cpp:235
false
Definition: StackSlotColoring.cpp:142
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:641
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2091
Utils.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:118
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4082
PrefetchDistance
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopDataPrefetch.cpp:37
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
PrefetchWrites
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7260
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
llvm::df_begin
df_iterator< T > df_begin(const T &G)
Definition: DepthFirstIterator.h:218
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Prefetch::InsertPt
Instruction * InsertPt
The point of insertion for the prefetch instruction.
Definition: LoopDataPrefetch.cpp:239
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopDataPrefetchPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: LoopDataPrefetch.cpp:175
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:276
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:152
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Prefetch::Writes
bool Writes
True if targeting a write memory access.
Definition: LoopDataPrefetch.cpp:241
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition: PassAnalysisSupport.h:88
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Prefetch::LSCEVAddRec
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Definition: LoopDataPrefetch.cpp:237
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:652
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Prefetch::Prefetch
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Definition: LoopDataPrefetch.cpp:246
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:627
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::LoopSimplifyID
char & LoopSimplifyID
Definition: LoopSimplify.cpp:800
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4215
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
Prefetch::addInstruction
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
Definition: LoopDataPrefetch.cpp:255
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:645
TargetTransformInfo.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2424
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequiredID
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:267
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:412
BasicBlockUtils.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:656
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
LoopDataPrefetch.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:370
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
llvm::createLoopDataPrefetchPass
FunctionPass * createLoopDataPrefetchPass()
Definition: LoopDataPrefetch.cpp:155
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37