LLVM  9.0.0svn
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a Loop Data Prefetching Pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
15 #define DEBUG_TYPE "loop-data-prefetch"
17 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Module.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Transforms/Scalar.h"
35 using namespace llvm;
36 
37 // By default, we limit this to creating 16 PHIs (which is a little over half
38 // of the allocatable register set).
39 static cl::opt<bool>
40 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
41  cl::desc("Prefetch write addresses"));
42 
43 static cl::opt<unsigned>
44  PrefetchDistance("prefetch-distance",
45  cl::desc("Number of instructions to prefetch ahead"),
46  cl::Hidden);
47 
48 static cl::opt<unsigned>
49  MinPrefetchStride("min-prefetch-stride",
50  cl::desc("Min stride to add prefetches"), cl::Hidden);
51 
53  "max-prefetch-iters-ahead",
54  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
55 
56 STATISTIC(NumPrefetches, "Number of prefetches inserted");
57 
58 namespace {
59 
60 /// Loop prefetch implementation class.
61 class LoopDataPrefetch {
62 public:
63  LoopDataPrefetch(AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE,
64  const TargetTransformInfo *TTI,
66  : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
67 
68  bool run();
69 
70 private:
71  bool runOnLoop(Loop *L);
72 
73  /// Check if the stride of the accesses is large enough to
74  /// warrant a prefetch.
75  bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
76 
77  unsigned getMinPrefetchStride() {
78  if (MinPrefetchStride.getNumOccurrences() > 0)
79  return MinPrefetchStride;
80  return TTI->getMinPrefetchStride();
81  }
82 
83  unsigned getPrefetchDistance() {
84  if (PrefetchDistance.getNumOccurrences() > 0)
85  return PrefetchDistance;
86  return TTI->getPrefetchDistance();
87  }
88 
89  unsigned getMaxPrefetchIterationsAhead() {
90  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
92  return TTI->getMaxPrefetchIterationsAhead();
93  }
94 
95  AssumptionCache *AC;
96  LoopInfo *LI;
97  ScalarEvolution *SE;
98  const TargetTransformInfo *TTI;
100 };
101 
102 /// Legacy class for inserting loop data prefetches.
103 class LoopDataPrefetchLegacyPass : public FunctionPass {
104 public:
105  static char ID; // Pass ID, replacement for typeid
106  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
108  }
109 
110  void getAnalysisUsage(AnalysisUsage &AU) const override {
119  }
120 
121  bool runOnFunction(Function &F) override;
122  };
123 }
124 
126 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
127  "Loop Data Prefetch", false, false)
133 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
134  "Loop Data Prefetch", false, false)
135 
137  return new LoopDataPrefetchLegacyPass();
138 }
139 
140 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
141  unsigned TargetMinStride = getMinPrefetchStride();
142  // No need to check if any stride goes.
143  if (TargetMinStride <= 1)
144  return true;
145 
146  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
147  // If MinStride is set, don't prefetch unless we can ensure that stride is
148  // larger.
149  if (!ConstStride)
150  return false;
151 
152  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
153  return TargetMinStride <= AbsStride;
154 }
155 
158  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
163  const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
164 
165  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
166  bool Changed = LDP.run();
167 
168  if (Changed) {
171  PA.preserve<LoopAnalysis>();
172  return PA;
173  }
174 
175  return PreservedAnalyses::all();
176 }
177 
179  if (skipFunction(F))
180  return false;
181 
182  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
183  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
184  AssumptionCache *AC =
185  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
187  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
188  const TargetTransformInfo *TTI =
189  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
190 
191  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
192  return LDP.run();
193 }
194 
195 bool LoopDataPrefetch::run() {
196  // If PrefetchDistance is not set, don't run the pass. This gives an
197  // opportunity for targets to run this pass for selected subtargets only
198  // (whose TTI sets PrefetchDistance).
199  if (getPrefetchDistance() == 0)
200  return false;
201  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
202 
203  bool MadeChange = false;
204 
205  for (Loop *I : *LI)
206  for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
207  MadeChange |= runOnLoop(*L);
208 
209  return MadeChange;
210 }
211 
212 bool LoopDataPrefetch::runOnLoop(Loop *L) {
213  bool MadeChange = false;
214 
215  // Only prefetch in the inner-most loop
216  if (!L->empty())
217  return MadeChange;
218 
220  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
221 
222  // Calculate the number of iterations ahead to prefetch
224  for (const auto BB : L->blocks()) {
225  // If the loop already has prefetches, then assume that the user knows
226  // what they are doing and don't add any more.
227  for (auto &I : *BB)
228  if (CallInst *CI = dyn_cast<CallInst>(&I))
229  if (Function *F = CI->getCalledFunction())
231  return MadeChange;
232 
233  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
234  }
235  unsigned LoopSize = Metrics.NumInsts;
236  if (!LoopSize)
237  LoopSize = 1;
238 
239  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
240  if (!ItersAhead)
241  ItersAhead = 1;
242 
243  if (ItersAhead > getMaxPrefetchIterationsAhead())
244  return MadeChange;
245 
246  LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
247  << " iterations ahead (loop size: " << LoopSize << ") in "
248  << L->getHeader()->getParent()->getName() << ": " << *L);
249 
251  for (const auto BB : L->blocks()) {
252  for (auto &I : *BB) {
253  Value *PtrValue;
254  Instruction *MemI;
255 
256  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
257  MemI = LMemI;
258  PtrValue = LMemI->getPointerOperand();
259  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
260  if (!PrefetchWrites) continue;
261  MemI = SMemI;
262  PtrValue = SMemI->getPointerOperand();
263  } else continue;
264 
265  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
266  if (PtrAddrSpace)
267  continue;
268 
269  if (L->isLoopInvariant(PtrValue))
270  continue;
271 
272  const SCEV *LSCEV = SE->getSCEV(PtrValue);
273  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
274  if (!LSCEVAddRec)
275  continue;
276 
277  // Check if the stride of the accesses is large enough to warrant a
278  // prefetch.
279  if (!isStrideLargeEnough(LSCEVAddRec))
280  continue;
281 
282  // We don't want to double prefetch individual cache lines. If this load
283  // is known to be within one cache line of some other load that has
284  // already been prefetched, then don't prefetch this one as well.
285  bool DupPref = false;
286  for (const auto &PrefLoad : PrefLoads) {
287  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second);
288  if (const SCEVConstant *ConstPtrDiff =
289  dyn_cast<SCEVConstant>(PtrDiff)) {
290  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
291  if (PD < (int64_t) TTI->getCacheLineSize()) {
292  DupPref = true;
293  break;
294  }
295  }
296  }
297  if (DupPref)
298  continue;
299 
300  const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
301  SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
302  LSCEVAddRec->getStepRecurrence(*SE)));
303  if (!isSafeToExpand(NextLSCEV, *SE))
304  continue;
305 
306  PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
307 
308  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
309  SCEVExpander SCEVE(*SE, I.getModule()->getDataLayout(), "prefaddr");
310  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
311 
312  IRBuilder<> Builder(MemI);
313  Module *M = BB->getParent()->getParent();
314  Type *I32 = Type::getInt32Ty(BB->getContext());
315  Function *PrefetchFunc =
317  Builder.CreateCall(
318  PrefetchFunc,
319  {PrefPtrValue,
320  ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
321  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
322  ++NumPrefetches;
323  LLVM_DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV
324  << "\n");
325  ORE->emit([&]() {
326  return OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI)
327  << "prefetched memory access";
328  });
329 
330  MadeChange = true;
331  }
332  }
333 
334  return MadeChange;
335 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
An instruction for reading from memory.
Definition: Instructions.h:167
loop data prefetch
FunctionPass * createLoopDataPrefetchPass()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1152
BlockT * getHeader() const
Definition: LoopInfo.h:102
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
This node represents a polynomial recurrence on the trip count of the specified loop.
An instruction for storing to memory.
Definition: Instructions.h:320
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1043
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
df_iterator< T > df_end(const T &G)
Machine Trace Metrics
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
This file provides the interface for LLVM&#39;s Loop Data Prefetching Pass.
A function analysis which provides an AssumptionCache.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
df_iterator< T > df_begin(const T &G)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This class uses information about analyze scalars to rewrite expressions in canonical form...
loop data Loop Data Prefetch
Analysis pass that exposes the ScalarEvolution for a function.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1223
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2193
bool empty() const
Definition: LoopInfo.h:148
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
OptimizationRemarkEmitter legacy analysis pass.
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1177
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
#define DEBUG_TYPE
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:158
The optimization diagnostic interface.
This class represents a constant integer value.