LLVM  4.0.0
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a Loop Data Prefetching Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
16 #define DEBUG_TYPE "loop-data-prefetch"
18 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/IR/CFG.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Module.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Transforms/Scalar.h"
41 using namespace llvm;
42 
43 // By default, we limit this to creating 16 PHIs (which is a little over half
44 // of the allocatable register set).
45 static cl::opt<bool>
46 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
47  cl::desc("Prefetch write addresses"));
48 
49 static cl::opt<unsigned>
50  PrefetchDistance("prefetch-distance",
51  cl::desc("Number of instructions to prefetch ahead"),
52  cl::Hidden);
53 
54 static cl::opt<unsigned>
55  MinPrefetchStride("min-prefetch-stride",
56  cl::desc("Min stride to add prefetches"), cl::Hidden);
57 
59  "max-prefetch-iters-ahead",
60  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
61 
62 STATISTIC(NumPrefetches, "Number of prefetches inserted");
63 
64 namespace {
65 
66 /// Loop prefetch implementation class.
67 class LoopDataPrefetch {
68 public:
69  LoopDataPrefetch(AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE,
70  const TargetTransformInfo *TTI,
72  : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
73 
74  bool run();
75 
76 private:
77  bool runOnLoop(Loop *L);
78 
79  /// \brief Check if the the stride of the accesses is large enough to
80  /// warrant a prefetch.
81  bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
82 
83  unsigned getMinPrefetchStride() {
84  if (MinPrefetchStride.getNumOccurrences() > 0)
85  return MinPrefetchStride;
86  return TTI->getMinPrefetchStride();
87  }
88 
89  unsigned getPrefetchDistance() {
90  if (PrefetchDistance.getNumOccurrences() > 0)
91  return PrefetchDistance;
92  return TTI->getPrefetchDistance();
93  }
94 
95  unsigned getMaxPrefetchIterationsAhead() {
96  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
98  return TTI->getMaxPrefetchIterationsAhead();
99  }
100 
101  AssumptionCache *AC;
102  LoopInfo *LI;
103  ScalarEvolution *SE;
104  const TargetTransformInfo *TTI;
106 };
107 
108 /// Legacy class for inserting loop data prefetches.
109 class LoopDataPrefetchLegacyPass : public FunctionPass {
110 public:
111  static char ID; // Pass ID, replacement for typeid
112  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
114  }
115 
116  void getAnalysisUsage(AnalysisUsage &AU) const override {
123  // FIXME: For some reason, preserving SE here breaks LSR (even if
124  // this pass changes nothing).
125  // AU.addPreserved<ScalarEvolutionWrapperPass>();
127  }
128 
129  bool runOnFunction(Function &F) override;
130  };
131 }
132 
134 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
135  "Loop Data Prefetch", false, false)
141 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
142  "Loop Data Prefetch", false, false)
143 
145  return new LoopDataPrefetchLegacyPass();
146 }
147 
148 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
149  unsigned TargetMinStride = getMinPrefetchStride();
150  // No need to check if any stride goes.
151  if (TargetMinStride <= 1)
152  return true;
153 
154  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
155  // If MinStride is set, don't prefetch unless we can ensure that stride is
156  // larger.
157  if (!ConstStride)
158  return false;
159 
160  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
161  return TargetMinStride <= AbsStride;
162 }
163 
166  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
171  const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
172 
173  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
174  bool Changed = LDP.run();
175 
176  if (Changed) {
179  PA.preserve<LoopAnalysis>();
180  return PA;
181  }
182 
183  return PreservedAnalyses::all();
184 }
185 
186 bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
187  if (skipFunction(F))
188  return false;
189 
190  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
191  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
192  AssumptionCache *AC =
193  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
195  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
196  const TargetTransformInfo *TTI =
197  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
198 
199  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
200  return LDP.run();
201 }
202 
203 bool LoopDataPrefetch::run() {
204  // If PrefetchDistance is not set, don't run the pass. This gives an
205  // opportunity for targets to run this pass for selected subtargets only
206  // (whose TTI sets PrefetchDistance).
207  if (getPrefetchDistance() == 0)
208  return false;
209  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
210 
211  bool MadeChange = false;
212 
213  for (Loop *I : *LI)
214  for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
215  MadeChange |= runOnLoop(*L);
216 
217  return MadeChange;
218 }
219 
220 bool LoopDataPrefetch::runOnLoop(Loop *L) {
221  bool MadeChange = false;
222 
223  // Only prefetch in the inner-most loop
224  if (!L->empty())
225  return MadeChange;
226 
228  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
229 
230  // Calculate the number of iterations ahead to prefetch
232  for (const auto BB : L->blocks()) {
233  // If the loop already has prefetches, then assume that the user knows
234  // what they are doing and don't add any more.
235  for (auto &I : *BB)
236  if (CallInst *CI = dyn_cast<CallInst>(&I))
237  if (Function *F = CI->getCalledFunction())
239  return MadeChange;
240 
241  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
242  }
243  unsigned LoopSize = Metrics.NumInsts;
244  if (!LoopSize)
245  LoopSize = 1;
246 
247  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
248  if (!ItersAhead)
249  ItersAhead = 1;
250 
251  if (ItersAhead > getMaxPrefetchIterationsAhead())
252  return MadeChange;
253 
254  DEBUG(dbgs() << "Prefetching " << ItersAhead
255  << " iterations ahead (loop size: " << LoopSize << ") in "
256  << L->getHeader()->getParent()->getName() << ": " << *L);
257 
259  for (const auto BB : L->blocks()) {
260  for (auto &I : *BB) {
261  Value *PtrValue;
262  Instruction *MemI;
263 
264  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
265  MemI = LMemI;
266  PtrValue = LMemI->getPointerOperand();
267  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
268  if (!PrefetchWrites) continue;
269  MemI = SMemI;
270  PtrValue = SMemI->getPointerOperand();
271  } else continue;
272 
273  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
274  if (PtrAddrSpace)
275  continue;
276 
277  if (L->isLoopInvariant(PtrValue))
278  continue;
279 
280  const SCEV *LSCEV = SE->getSCEV(PtrValue);
281  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
282  if (!LSCEVAddRec)
283  continue;
284 
285  // Check if the the stride of the accesses is large enough to warrant a
286  // prefetch.
287  if (!isStrideLargeEnough(LSCEVAddRec))
288  continue;
289 
290  // We don't want to double prefetch individual cache lines. If this load
291  // is known to be within one cache line of some other load that has
292  // already been prefetched, then don't prefetch this one as well.
293  bool DupPref = false;
294  for (const auto &PrefLoad : PrefLoads) {
295  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second);
296  if (const SCEVConstant *ConstPtrDiff =
297  dyn_cast<SCEVConstant>(PtrDiff)) {
298  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
299  if (PD < (int64_t) TTI->getCacheLineSize()) {
300  DupPref = true;
301  break;
302  }
303  }
304  }
305  if (DupPref)
306  continue;
307 
308  const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
309  SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
310  LSCEVAddRec->getStepRecurrence(*SE)));
311  if (!isSafeToExpand(NextLSCEV, *SE))
312  continue;
313 
314  PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
315 
316  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
317  SCEVExpander SCEVE(*SE, I.getModule()->getDataLayout(), "prefaddr");
318  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
319 
320  IRBuilder<> Builder(MemI);
321  Module *M = BB->getParent()->getParent();
322  Type *I32 = Type::getInt32Ty(BB->getContext());
324  Builder.CreateCall(
325  PrefetchFunc,
326  {PrefPtrValue,
327  ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
328  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
329  ++NumPrefetches;
330  DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV
331  << "\n");
332  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI)
333  << "prefetched memory access");
334 
335  MadeChange = true;
336  }
337  }
338 
339  return MadeChange;
340 }
341 
MachineLoop * L
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
const SCEV * getConstant(ConstantInt *V)
STATISTIC(NumFunctions,"Total number of functions")
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
The main scalar evolution driver.
This class represents a function call, abstracting a target machine's calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:189
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
An instruction for reading from memory.
Definition: Instructions.h:164
loop data prefetch
BlockT * getHeader() const
Definition: LoopInfo.h:102
FunctionPass * createLoopDataPrefetchPass()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
This is the interface for a SCEV-based alias analysis.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:55
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:806
loop data Loop Data false
#define F(x, y, z)
Definition: MD5.cpp:51
bool mayReadFromMemory() const
Return true if this instruction may read memory.
This node represents a polynomial recurrence on the trip count of the specified loop.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:949
An instruction for storing to memory.
Definition: Instructions.h:300
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:143
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:653
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
df_iterator< T > df_end(const T &G)
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass,"loop-data-prefetch","Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
This file provides the interface for LLVM's Loop Data Prefetching Pass.
A function analysis which provides an AssumptionCache.
machine trace Machine Trace Metrics
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
void emit(DiagnosticInfoOptimizationBase &OptDiag)
The new interface to emit remarks.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:146
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
df_iterator< T > df_begin(const T &G)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This class uses information about analyze scalars to rewrite expressions in canonical form...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
loop data Loop Data Prefetch
Analysis pass that exposes the ScalarEvolution for a function.
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
#define I(x, y, z)
Definition: MD5.cpp:54
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1099
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:120
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues)
Add information about a block to the current state.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:71
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
OptimizationRemarkEmitter legacy analysis pass.
bool empty() const
Definition: LoopInfo.h:136
#define DEBUG(X)
Definition: Debug.h:100
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:831
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
This pass exposes codegen information to IR-level passes.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
#define DEBUG_TYPE
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical multiply expression, or something simpler if possible.
The optimization diagnostic interface.
This class represents a constant integer value.