34#define DEBUG_TYPE "loop-data-prefetch"
42 cl::desc(
"Prefetch write addresses"));
46 cl::desc(
"Number of instructions to prefetch ahead"),
54 "max-prefetch-iters-ahead",
57STATISTIC(NumPrefetches,
"Number of prefetches inserted");
62class LoopDataPrefetch {
67 : AC(AC), DT(DT), LI(LI), SE(SE),
TTI(
TTI), ORE(ORE) {}
72 bool runOnLoop(
Loop *L);
76 bool isStrideLargeEnough(
const SCEVAddRecExpr *AR,
unsigned TargetMinStride);
78 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
79 unsigned NumStridedMemAccesses,
80 unsigned NumPrefetches,
85 NumPrefetches, HasCall);
88 unsigned getPrefetchDistance() {
94 unsigned getMaxPrefetchIterationsAhead() {
100 bool doPrefetchWrites() {
140char LoopDataPrefetchLegacyPass::ID = 0;
142 "Loop Data Prefetch",
false,
false)
153 return new LoopDataPrefetchLegacyPass();
156bool LoopDataPrefetch::isStrideLargeEnough(
const SCEVAddRecExpr *AR,
157 unsigned TargetMinStride) {
159 if (TargetMinStride <= 1)
168 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
169 return TargetMinStride <= AbsStride;
182 LoopDataPrefetch LDP(AC, DT, LI, SE,
TTI, ORE);
183 bool Changed = LDP.run();
195bool LoopDataPrefetchLegacyPass::runOnFunction(
Function &
F) {
199 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
200 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
201 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
203 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
205 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
207 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
209 LoopDataPrefetch LDP(AC, DT, LI, SE,
TTI, ORE);
213bool LoopDataPrefetch::run() {
218 LLVM_DEBUG(
dbgs() <<
"Please set both PrefetchDistance and CacheLineSize "
219 "for loop data prefetch.\n");
223 bool MadeChange =
false;
227 MadeChange |= runOnLoop(L);
254 int64_t PtrDiff = 0) {
262 if (PrefBB != InsBB) {
268 if (isa<StoreInst>(
I) && PtrDiff == 0)
274bool LoopDataPrefetch::runOnLoop(
Loop *L) {
275 bool MadeChange =
false;
278 if (!
L->isInnermost())
286 bool HasCall =
false;
287 for (
const auto BB :
L->blocks()) {
290 for (
auto &
I : *BB) {
291 if (isa<CallInst>(&
I) || isa<InvokeInst>(&
I)) {
293 if (
F->getIntrinsicID() == Intrinsic::prefetch)
302 Metrics.analyzeBasicBlock(BB, *
TTI, EphValues);
305 if (!
Metrics.NumInsts.isValid())
308 unsigned LoopSize = *
Metrics.NumInsts.getValue();
312 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
316 if (ItersAhead > getMaxPrefetchIterationsAhead())
320 if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
323 unsigned NumMemAccesses = 0;
324 unsigned NumStridedMemAccesses = 0;
326 for (
const auto BB :
L->blocks())
327 for (
auto &
I : *BB) {
331 if (
LoadInst *LMemI = dyn_cast<LoadInst>(&
I)) {
333 PtrValue = LMemI->getPointerOperand();
334 }
else if (
StoreInst *SMemI = dyn_cast<StoreInst>(&
I)) {
335 if (!doPrefetchWrites())
continue;
337 PtrValue = SMemI->getPointerOperand();
344 if (
L->isLoopInvariant(PtrValue))
348 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
351 NumStridedMemAccesses++;
356 bool DupPref =
false;
357 for (
auto &Pref : Prefetches) {
360 dyn_cast<SCEVConstant>(PtrDiff)) {
361 int64_t
PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
363 Pref.addInstruction(MemI, DT, PD);
370 Prefetches.push_back(
Prefetch(LSCEVAddRec, MemI));
373 unsigned TargetMinStride =
374 getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
375 Prefetches.size(), HasCall);
378 <<
" iterations ahead (loop size: " << LoopSize <<
") in "
379 <<
L->getHeader()->getParent()->getName() <<
": " << *L);
381 << NumMemAccesses <<
" memory accesses, "
382 << NumStridedMemAccesses <<
" strided memory accesses, "
383 << Prefetches.size() <<
" potential prefetch(es), "
384 <<
"a minimum stride of " << TargetMinStride <<
", "
385 << (HasCall ?
"calls" :
"no calls") <<
".\n");
387 for (
auto &
P : Prefetches) {
390 if (!isStrideLargeEnough(
P.LSCEVAddRec, TargetMinStride))
397 P.LSCEVAddRec->getStepRecurrence(*SE)));
398 if (!SCEVE.isSafeToExpand(NextLSCEV))
403 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr,
P.InsertPt);
409 M, Intrinsic::prefetch, PrefPtrValue->
getType());
413 ConstantInt::get(I32,
P.Writes),
414 ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
417 << *
P.MemI->getOperand(isa<LoadInst>(
P.MemI) ? 0 : 1)
418 <<
", SCEV: " << *
P.LSCEVAddRec <<
"\n");
421 <<
"prefetched memory access";
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
SmallVector< uint32_t, 0 > Writes
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This file provides the interface for LLVM's Loop Data Prefetching Pass.
static const Function * getCalledFunction(const Value *V)
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const ParentTy * getParent() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
FunctionPass * createLoopDataPrefetchPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< df_iterator< T > > depth_first(const T &G)
A record for a potential prefetch made during the initial scan of the loop.
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).