33#define DEBUG_TYPE "loop-data-prefetch"
41 cl::desc(
"Prefetch write addresses"));
45 cl::desc(
"Number of instructions to prefetch ahead"),
53 "max-prefetch-iters-ahead",
56STATISTIC(NumPrefetches,
"Number of prefetches inserted");
61class LoopDataPrefetch {
66 : AC(AC), DT(DT), LI(LI), SE(SE),
TTI(
TTI), ORE(ORE) {}
71 bool runOnLoop(
Loop *L);
75 bool isStrideLargeEnough(
const SCEVAddRecExpr *AR,
unsigned TargetMinStride);
77 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
78 unsigned NumStridedMemAccesses,
79 unsigned NumPrefetches,
84 NumPrefetches, HasCall);
87 unsigned getPrefetchDistance() {
93 unsigned getMaxPrefetchIterationsAhead() {
99 bool doPrefetchWrites() {
139char LoopDataPrefetchLegacyPass::ID = 0;
141 "Loop Data Prefetch",
false,
false)
152 return new LoopDataPrefetchLegacyPass();
155bool LoopDataPrefetch::isStrideLargeEnough(
const SCEVAddRecExpr *AR,
156 unsigned TargetMinStride) {
158 if (TargetMinStride <= 1)
167 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
168 return TargetMinStride <= AbsStride;
181 LoopDataPrefetch LDP(AC, DT, LI, SE,
TTI, ORE);
182 bool Changed = LDP.run();
194bool LoopDataPrefetchLegacyPass::runOnFunction(
Function &
F) {
198 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
199 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
200 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
202 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
204 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
206 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
208 LoopDataPrefetch LDP(AC, DT, LI, SE,
TTI, ORE);
212bool LoopDataPrefetch::run() {
217 LLVM_DEBUG(
dbgs() <<
"Please set both PrefetchDistance and CacheLineSize "
218 "for loop data prefetch.\n");
222 bool MadeChange =
false;
226 MadeChange |= runOnLoop(L);
253 int64_t PtrDiff = 0) {
261 if (PrefBB != InsBB) {
267 if (isa<StoreInst>(
I) && PtrDiff == 0)
273bool LoopDataPrefetch::runOnLoop(
Loop *L) {
274 bool MadeChange =
false;
277 if (!
L->isInnermost())
285 bool HasCall =
false;
286 for (
const auto BB :
L->blocks()) {
289 for (
auto &
I : *BB) {
290 if (isa<CallInst>(&
I) || isa<InvokeInst>(&
I)) {
292 if (
F->getIntrinsicID() == Intrinsic::prefetch)
301 Metrics.analyzeBasicBlock(BB, *
TTI, EphValues);
304 if (!
Metrics.NumInsts.isValid())
307 unsigned LoopSize = *
Metrics.NumInsts.getValue();
311 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
315 if (ItersAhead > getMaxPrefetchIterationsAhead())
319 if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
322 unsigned NumMemAccesses = 0;
323 unsigned NumStridedMemAccesses = 0;
325 for (
const auto BB :
L->blocks())
326 for (
auto &
I : *BB) {
330 if (
LoadInst *LMemI = dyn_cast<LoadInst>(&
I)) {
332 PtrValue = LMemI->getPointerOperand();
333 }
else if (
StoreInst *SMemI = dyn_cast<StoreInst>(&
I)) {
334 if (!doPrefetchWrites())
continue;
336 PtrValue = SMemI->getPointerOperand();
343 if (
L->isLoopInvariant(PtrValue))
347 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
350 NumStridedMemAccesses++;
355 bool DupPref =
false;
356 for (
auto &Pref : Prefetches) {
359 dyn_cast<SCEVConstant>(PtrDiff)) {
360 int64_t
PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
362 Pref.addInstruction(MemI, DT, PD);
369 Prefetches.push_back(
Prefetch(LSCEVAddRec, MemI));
372 unsigned TargetMinStride =
373 getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
374 Prefetches.size(), HasCall);
377 <<
" iterations ahead (loop size: " << LoopSize <<
") in "
378 <<
L->getHeader()->getParent()->getName() <<
": " << *L);
380 << NumMemAccesses <<
" memory accesses, "
381 << NumStridedMemAccesses <<
" strided memory accesses, "
382 << Prefetches.size() <<
" potential prefetch(es), "
383 <<
"a minimum stride of " << TargetMinStride <<
", "
384 << (HasCall ?
"calls" :
"no calls") <<
".\n");
386 for (
auto &
P : Prefetches) {
389 if (!isStrideLargeEnough(
P.LSCEVAddRec, TargetMinStride))
396 P.LSCEVAddRec->getStepRecurrence(*SE)));
397 if (!SCEVE.isSafeToExpand(NextLSCEV))
402 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr,
P.InsertPt);
406 Builder.CreateIntrinsic(Intrinsic::prefetch, PrefPtrValue->
getType(),
407 {PrefPtrValue, ConstantInt::get(I32, P.Writes),
408 ConstantInt::get(I32, 3),
409 ConstantInt::get(I32, 1)});
412 << *
P.MemI->getOperand(isa<LoadInst>(
P.MemI) ? 0 : 1)
413 <<
", SCEV: " << *
P.LSCEVAddRec <<
"\n");
416 <<
"prefetched memory access";
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
SmallVector< uint32_t, 0 > Writes
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This file provides the interface for LLVM's Loop Data Prefetching Pass.
static const Function * getCalledFunction(const Value *V)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const ParentTy * getParent() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
FunctionPass * createLoopDataPrefetchPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< df_iterator< T > > depth_first(const T &G)
A record for a potential prefetch made during the initial scan of the loop.
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).