LLVM  8.0.0svn
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a Loop Data Prefetching Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
16 #define DEBUG_TYPE "loop-data-prefetch"
18 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Module.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Transforms/Scalar.h"
36 using namespace llvm;
37 
38 // By default, we limit this to creating 16 PHIs (which is a little over half
39 // of the allocatable register set).
40 static cl::opt<bool>
41 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
42  cl::desc("Prefetch write addresses"));
43 
44 static cl::opt<unsigned>
45  PrefetchDistance("prefetch-distance",
46  cl::desc("Number of instructions to prefetch ahead"),
47  cl::Hidden);
48 
49 static cl::opt<unsigned>
50  MinPrefetchStride("min-prefetch-stride",
51  cl::desc("Min stride to add prefetches"), cl::Hidden);
52 
54  "max-prefetch-iters-ahead",
55  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
56 
57 STATISTIC(NumPrefetches, "Number of prefetches inserted");
58 
59 namespace {
60 
61 /// Loop prefetch implementation class.
62 class LoopDataPrefetch {
63 public:
64  LoopDataPrefetch(AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE,
65  const TargetTransformInfo *TTI,
67  : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
68 
69  bool run();
70 
71 private:
72  bool runOnLoop(Loop *L);
73 
74  /// Check if the stride of the accesses is large enough to
75  /// warrant a prefetch.
76  bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
77 
78  unsigned getMinPrefetchStride() {
79  if (MinPrefetchStride.getNumOccurrences() > 0)
80  return MinPrefetchStride;
81  return TTI->getMinPrefetchStride();
82  }
83 
84  unsigned getPrefetchDistance() {
85  if (PrefetchDistance.getNumOccurrences() > 0)
86  return PrefetchDistance;
87  return TTI->getPrefetchDistance();
88  }
89 
90  unsigned getMaxPrefetchIterationsAhead() {
91  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
93  return TTI->getMaxPrefetchIterationsAhead();
94  }
95 
96  AssumptionCache *AC;
97  LoopInfo *LI;
98  ScalarEvolution *SE;
99  const TargetTransformInfo *TTI;
101 };
102 
103 /// Legacy class for inserting loop data prefetches.
104 class LoopDataPrefetchLegacyPass : public FunctionPass {
105 public:
106  static char ID; // Pass ID, replacement for typeid
107  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
109  }
110 
111  void getAnalysisUsage(AnalysisUsage &AU) const override {
120  }
121 
122  bool runOnFunction(Function &F) override;
123  };
124 }
125 
127 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
128  "Loop Data Prefetch", false, false)
134 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
135  "Loop Data Prefetch", false, false)
136 
138  return new LoopDataPrefetchLegacyPass();
139 }
140 
141 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
142  unsigned TargetMinStride = getMinPrefetchStride();
143  // No need to check if any stride goes.
144  if (TargetMinStride <= 1)
145  return true;
146 
147  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
148  // If MinStride is set, don't prefetch unless we can ensure that stride is
149  // larger.
150  if (!ConstStride)
151  return false;
152 
153  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
154  return TargetMinStride <= AbsStride;
155 }
156 
159  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
164  const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
165 
166  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
167  bool Changed = LDP.run();
168 
169  if (Changed) {
172  PA.preserve<LoopAnalysis>();
173  return PA;
174  }
175 
176  return PreservedAnalyses::all();
177 }
178 
180  if (skipFunction(F))
181  return false;
182 
183  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
184  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
185  AssumptionCache *AC =
186  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
188  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
189  const TargetTransformInfo *TTI =
190  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
191 
192  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
193  return LDP.run();
194 }
195 
196 bool LoopDataPrefetch::run() {
197  // If PrefetchDistance is not set, don't run the pass. This gives an
198  // opportunity for targets to run this pass for selected subtargets only
199  // (whose TTI sets PrefetchDistance).
200  if (getPrefetchDistance() == 0)
201  return false;
202  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
203 
204  bool MadeChange = false;
205 
206  for (Loop *I : *LI)
207  for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
208  MadeChange |= runOnLoop(*L);
209 
210  return MadeChange;
211 }
212 
213 bool LoopDataPrefetch::runOnLoop(Loop *L) {
214  bool MadeChange = false;
215 
216  // Only prefetch in the inner-most loop
217  if (!L->empty())
218  return MadeChange;
219 
221  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
222 
223  // Calculate the number of iterations ahead to prefetch
225  for (const auto BB : L->blocks()) {
226  // If the loop already has prefetches, then assume that the user knows
227  // what they are doing and don't add any more.
228  for (auto &I : *BB)
229  if (CallInst *CI = dyn_cast<CallInst>(&I))
230  if (Function *F = CI->getCalledFunction())
232  return MadeChange;
233 
234  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
235  }
236  unsigned LoopSize = Metrics.NumInsts;
237  if (!LoopSize)
238  LoopSize = 1;
239 
240  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
241  if (!ItersAhead)
242  ItersAhead = 1;
243 
244  if (ItersAhead > getMaxPrefetchIterationsAhead())
245  return MadeChange;
246 
247  LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
248  << " iterations ahead (loop size: " << LoopSize << ") in "
249  << L->getHeader()->getParent()->getName() << ": " << *L);
250 
252  for (const auto BB : L->blocks()) {
253  for (auto &I : *BB) {
254  Value *PtrValue;
255  Instruction *MemI;
256 
257  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
258  MemI = LMemI;
259  PtrValue = LMemI->getPointerOperand();
260  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
261  if (!PrefetchWrites) continue;
262  MemI = SMemI;
263  PtrValue = SMemI->getPointerOperand();
264  } else continue;
265 
266  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
267  if (PtrAddrSpace)
268  continue;
269 
270  if (L->isLoopInvariant(PtrValue))
271  continue;
272 
273  const SCEV *LSCEV = SE->getSCEV(PtrValue);
274  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
275  if (!LSCEVAddRec)
276  continue;
277 
278  // Check if the stride of the accesses is large enough to warrant a
279  // prefetch.
280  if (!isStrideLargeEnough(LSCEVAddRec))
281  continue;
282 
283  // We don't want to double prefetch individual cache lines. If this load
284  // is known to be within one cache line of some other load that has
285  // already been prefetched, then don't prefetch this one as well.
286  bool DupPref = false;
287  for (const auto &PrefLoad : PrefLoads) {
288  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second);
289  if (const SCEVConstant *ConstPtrDiff =
290  dyn_cast<SCEVConstant>(PtrDiff)) {
291  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
292  if (PD < (int64_t) TTI->getCacheLineSize()) {
293  DupPref = true;
294  break;
295  }
296  }
297  }
298  if (DupPref)
299  continue;
300 
301  const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
302  SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
303  LSCEVAddRec->getStepRecurrence(*SE)));
304  if (!isSafeToExpand(NextLSCEV, *SE))
305  continue;
306 
307  PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
308 
309  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
310  SCEVExpander SCEVE(*SE, I.getModule()->getDataLayout(), "prefaddr");
311  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
312 
313  IRBuilder<> Builder(MemI);
314  Module *M = BB->getParent()->getParent();
315  Type *I32 = Type::getInt32Ty(BB->getContext());
317  Builder.CreateCall(
318  PrefetchFunc,
319  {PrefPtrValue,
320  ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
321  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
322  ++NumPrefetches;
323  LLVM_DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV
324  << "\n");
325  ORE->emit([&]() {
326  return OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI)
327  << "prefetched memory access";
328  });
329 
330  MadeChange = true;
331  }
332  }
333 
334  return MadeChange;
335 }
336 
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:168
loop data prefetch
FunctionPass * createLoopDataPrefetchPass()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:939
BlockT * getHeader() const
Definition: LoopInfo.h:100
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This node represents a polynomial recurrence on the trip count of the specified loop.
An instruction for storing to memory.
Definition: Instructions.h:310
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1021
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
df_iterator< T > df_end(const T &G)
Machine Trace Metrics
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
This file provides the interface for LLVM&#39;s Loop Data Prefetching Pass.
A function analysis which provides an AssumptionCache.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:194
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
df_iterator< T > df_begin(const T &G)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This class uses information about analyze scalars to rewrite expressions in canonical form...
loop data Loop Data Prefetch
Analysis pass that exposes the ScalarEvolution for a function.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
bool empty() const
Definition: LoopInfo.h:146
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
OptimizationRemarkEmitter legacy analysis pass.
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
#define DEBUG_TYPE
#define LLVM_DEBUG(X)
Definition: Debug.h:123
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
The optimization diagnostic interface.
This class represents a constant integer value.
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1883