LLVM  6.0.0svn
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a Loop Data Prefetching Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
16 #define DEBUG_TYPE "loop-data-prefetch"
18 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/IR/CFG.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Module.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Transforms/Scalar.h"
41 using namespace llvm;
42 
43 // By default, we limit this to creating 16 PHIs (which is a little over half
44 // of the allocatable register set).
45 static cl::opt<bool>
46 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
47  cl::desc("Prefetch write addresses"));
48 
49 static cl::opt<unsigned>
50  PrefetchDistance("prefetch-distance",
51  cl::desc("Number of instructions to prefetch ahead"),
52  cl::Hidden);
53 
54 static cl::opt<unsigned>
55  MinPrefetchStride("min-prefetch-stride",
56  cl::desc("Min stride to add prefetches"), cl::Hidden);
57 
59  "max-prefetch-iters-ahead",
60  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
61 
62 STATISTIC(NumPrefetches, "Number of prefetches inserted");
63 
64 namespace {
65 
66 /// Loop prefetch implementation class.
67 class LoopDataPrefetch {
68 public:
69  LoopDataPrefetch(AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE,
70  const TargetTransformInfo *TTI,
72  : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
73 
74  bool run();
75 
76 private:
77  bool runOnLoop(Loop *L);
78 
79  /// \brief Check if the the stride of the accesses is large enough to
80  /// warrant a prefetch.
81  bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
82 
83  unsigned getMinPrefetchStride() {
84  if (MinPrefetchStride.getNumOccurrences() > 0)
85  return MinPrefetchStride;
86  return TTI->getMinPrefetchStride();
87  }
88 
89  unsigned getPrefetchDistance() {
90  if (PrefetchDistance.getNumOccurrences() > 0)
91  return PrefetchDistance;
92  return TTI->getPrefetchDistance();
93  }
94 
95  unsigned getMaxPrefetchIterationsAhead() {
96  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
98  return TTI->getMaxPrefetchIterationsAhead();
99  }
100 
101  AssumptionCache *AC;
102  LoopInfo *LI;
103  ScalarEvolution *SE;
104  const TargetTransformInfo *TTI;
106 };
107 
108 /// Legacy class for inserting loop data prefetches.
109 class LoopDataPrefetchLegacyPass : public FunctionPass {
110 public:
111  static char ID; // Pass ID, replacement for typeid
112  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
114  }
115 
116  void getAnalysisUsage(AnalysisUsage &AU) const override {
125  }
126 
127  bool runOnFunction(Function &F) override;
128  };
129 }
130 
132 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
133  "Loop Data Prefetch", false, false)
139 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
140  "Loop Data Prefetch", false, false)
141 
143  return new LoopDataPrefetchLegacyPass();
144 }
145 
146 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
147  unsigned TargetMinStride = getMinPrefetchStride();
148  // No need to check if any stride goes.
149  if (TargetMinStride <= 1)
150  return true;
151 
152  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
153  // If MinStride is set, don't prefetch unless we can ensure that stride is
154  // larger.
155  if (!ConstStride)
156  return false;
157 
158  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
159  return TargetMinStride <= AbsStride;
160 }
161 
164  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
169  const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
170 
171  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
172  bool Changed = LDP.run();
173 
174  if (Changed) {
177  PA.preserve<LoopAnalysis>();
178  return PA;
179  }
180 
181  return PreservedAnalyses::all();
182 }
183 
185  if (skipFunction(F))
186  return false;
187 
188  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
189  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
190  AssumptionCache *AC =
191  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
193  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
194  const TargetTransformInfo *TTI =
195  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
196 
197  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
198  return LDP.run();
199 }
200 
201 bool LoopDataPrefetch::run() {
202  // If PrefetchDistance is not set, don't run the pass. This gives an
203  // opportunity for targets to run this pass for selected subtargets only
204  // (whose TTI sets PrefetchDistance).
205  if (getPrefetchDistance() == 0)
206  return false;
207  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
208 
209  bool MadeChange = false;
210 
211  for (Loop *I : *LI)
212  for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
213  MadeChange |= runOnLoop(*L);
214 
215  return MadeChange;
216 }
217 
218 bool LoopDataPrefetch::runOnLoop(Loop *L) {
219  bool MadeChange = false;
220 
221  // Only prefetch in the inner-most loop
222  if (!L->empty())
223  return MadeChange;
224 
226  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
227 
228  // Calculate the number of iterations ahead to prefetch
230  for (const auto BB : L->blocks()) {
231  // If the loop already has prefetches, then assume that the user knows
232  // what they are doing and don't add any more.
233  for (auto &I : *BB)
234  if (CallInst *CI = dyn_cast<CallInst>(&I))
235  if (Function *F = CI->getCalledFunction())
237  return MadeChange;
238 
239  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
240  }
241  unsigned LoopSize = Metrics.NumInsts;
242  if (!LoopSize)
243  LoopSize = 1;
244 
245  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
246  if (!ItersAhead)
247  ItersAhead = 1;
248 
249  if (ItersAhead > getMaxPrefetchIterationsAhead())
250  return MadeChange;
251 
252  DEBUG(dbgs() << "Prefetching " << ItersAhead
253  << " iterations ahead (loop size: " << LoopSize << ") in "
254  << L->getHeader()->getParent()->getName() << ": " << *L);
255 
257  for (const auto BB : L->blocks()) {
258  for (auto &I : *BB) {
259  Value *PtrValue;
260  Instruction *MemI;
261 
262  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
263  MemI = LMemI;
264  PtrValue = LMemI->getPointerOperand();
265  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
266  if (!PrefetchWrites) continue;
267  MemI = SMemI;
268  PtrValue = SMemI->getPointerOperand();
269  } else continue;
270 
271  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
272  if (PtrAddrSpace)
273  continue;
274 
275  if (L->isLoopInvariant(PtrValue))
276  continue;
277 
278  const SCEV *LSCEV = SE->getSCEV(PtrValue);
279  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
280  if (!LSCEVAddRec)
281  continue;
282 
283  // Check if the the stride of the accesses is large enough to warrant a
284  // prefetch.
285  if (!isStrideLargeEnough(LSCEVAddRec))
286  continue;
287 
288  // We don't want to double prefetch individual cache lines. If this load
289  // is known to be within one cache line of some other load that has
290  // already been prefetched, then don't prefetch this one as well.
291  bool DupPref = false;
292  for (const auto &PrefLoad : PrefLoads) {
293  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second);
294  if (const SCEVConstant *ConstPtrDiff =
295  dyn_cast<SCEVConstant>(PtrDiff)) {
296  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
297  if (PD < (int64_t) TTI->getCacheLineSize()) {
298  DupPref = true;
299  break;
300  }
301  }
302  }
303  if (DupPref)
304  continue;
305 
306  const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
307  SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
308  LSCEVAddRec->getStepRecurrence(*SE)));
309  if (!isSafeToExpand(NextLSCEV, *SE))
310  continue;
311 
312  PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
313 
314  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
315  SCEVExpander SCEVE(*SE, I.getModule()->getDataLayout(), "prefaddr");
316  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
317 
318  IRBuilder<> Builder(MemI);
319  Module *M = BB->getParent()->getParent();
320  Type *I32 = Type::getInt32Ty(BB->getContext());
322  Builder.CreateCall(
323  PrefetchFunc,
324  {PrefPtrValue,
325  ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
326  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
327  ++NumPrefetches;
328  DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV
329  << "\n");
330  ORE->emit([&]() {
331  return OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI)
332  << "prefetched memory access";
333  });
334 
335  MadeChange = true;
336  }
337  }
338 
339  return MadeChange;
340 }
341 
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:238
F(f)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
loop data prefetch
FunctionPass * createLoopDataPrefetchPass()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
This is the interface for a SCEV-based alias analysis.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:907
BlockT * getHeader() const
Definition: LoopInfo.h:100
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This node represents a polynomial recurrence on the trip count of the specified loop.
An instruction for storing to memory.
Definition: Instructions.h:306
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:980
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
df_iterator< T > df_end(const T &G)
Machine Trace Metrics
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:56
This file provides the interface for LLVM&#39;s Loop Data Prefetching Pass.
A function analysis which provides an AssumptionCache.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
df_iterator< T > df_begin(const T &G)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This class uses information about analyze scalars to rewrite expressions in canonical form...
loop data Loop Data Prefetch
Analysis pass that exposes the ScalarEvolution for a function.
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:420
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
bool empty() const
Definition: LoopInfo.h:146
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
OptimizationRemarkEmitter legacy analysis pass.
#define DEBUG(X)
Definition: Debug.h:118
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:932
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
This pass exposes codegen information to IR-level passes.
#define DEBUG_TYPE
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
The optimization diagnostic interface.
This class represents a constant integer value.
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1663