16 #define DEBUG_TYPE "loop-data-prefetch"
47 cl::desc(
"Prefetch write addresses"));
51 cl::desc(
"Number of instructions to prefetch ahead"),
59 "max-prefetch-iters-ahead",
62 STATISTIC(NumPrefetches,
"Number of prefetches inserted");
67 class LoopDataPrefetch {
72 : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
77 bool runOnLoop(
Loop *
L);
83 unsigned getMinPrefetchStride() {
86 return TTI->getMinPrefetchStride();
89 unsigned getPrefetchDistance() {
92 return TTI->getPrefetchDistance();
95 unsigned getMaxPrefetchIterationsAhead() {
98 return TTI->getMaxPrefetchIterationsAhead();
109 class LoopDataPrefetchLegacyPass :
public FunctionPass {
129 bool runOnFunction(
Function &
F)
override;
135 "Loop Data Prefetch",
false,
false)
145 return new LoopDataPrefetchLegacyPass();
148 bool LoopDataPrefetch::isStrideLargeEnough(
const SCEVAddRecExpr *AR) {
149 unsigned TargetMinStride = getMinPrefetchStride();
151 if (TargetMinStride <= 1)
160 unsigned AbsStride =
std::abs(ConstStride->getAPInt().getSExtValue());
161 return TargetMinStride <= AbsStride;
173 LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
174 bool Changed = LDP.run();
186 bool LoopDataPrefetchLegacyPass::runOnFunction(
Function &
F) {
190 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
191 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
193 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
195 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
197 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
199 LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
203 bool LoopDataPrefetch::run() {
207 if (getPrefetchDistance() == 0)
211 bool MadeChange =
false;
215 MadeChange |= runOnLoop(*
L);
220 bool LoopDataPrefetch::runOnLoop(
Loop *
L) {
221 bool MadeChange =
false;
232 for (
const auto BB : L->
blocks()) {
236 if (
CallInst *CI = dyn_cast<CallInst>(&
I))
237 if (
Function *F = CI->getCalledFunction())
243 unsigned LoopSize = Metrics.
NumInsts;
247 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
251 if (ItersAhead > getMaxPrefetchIterationsAhead())
254 DEBUG(
dbgs() <<
"Prefetching " << ItersAhead
255 <<
" iterations ahead (loop size: " << LoopSize <<
") in "
256 << L->
getHeader()->getParent()->getName() <<
": " << *
L);
259 for (
const auto BB : L->
blocks()) {
260 for (
auto &
I : *BB) {
264 if (
LoadInst *LMemI = dyn_cast<LoadInst>(&
I)) {
266 PtrValue = LMemI->getPointerOperand();
267 }
else if (
StoreInst *SMemI = dyn_cast<StoreInst>(&
I)) {
270 PtrValue = SMemI->getPointerOperand();
287 if (!isStrideLargeEnough(LSCEVAddRec))
293 bool DupPref =
false;
294 for (
const auto &PrefLoad : PrefLoads) {
297 dyn_cast<SCEVConstant>(PtrDiff)) {
298 int64_t
PD =
std::abs(ConstPtrDiff->getValue()->getSExtValue());
314 PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
317 SCEVExpander SCEVE(*SE,
I.getModule()->getDataLayout(),
"prefaddr");
318 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
321 Module *M = BB->getParent()->getParent();
330 DEBUG(
dbgs() <<
" Access: " << *PtrValue <<
", SCEV: " << *LSCEV
333 <<
"prefetched memory access");
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
const SCEV * getConstant(ConstantInt *V)
STATISTIC(NumFunctions,"Total number of functions")
A Module instance is used to store all the information related to an LLVM module. ...
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
The main scalar evolution driver.
This class represents a function call, abstracting a target machine's calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Analysis pass which computes a DominatorTree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
An instruction for reading from memory.
BlockT * getHeader() const
FunctionPass * createLoopDataPrefetchPass()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
This is the interface for a SCEV-based alias analysis.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Analysis pass that exposes the LoopInfo for a function.
loop data Loop Data false
bool mayReadFromMemory() const
Return true if this instruction may read memory.
This node represents a polynomial recurrence on the trip count of the specified loop.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
An instruction for storing to memory.
iterator_range< block_iterator > blocks() const
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
initializer< Ty > init(const Ty &Val)
A set of analyses that are preserved following a run of a transformation pass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
The instances of the Type class are immutable: once they are created, they are never changed...
df_iterator< T > df_end(const T &G)
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass,"loop-data-prefetch","Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
Represent the analysis usage information of a pass.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
This file provides the interface for LLVM's Loop Data Prefetching Pass.
A function analysis which provides an AssumptionCache.
machine trace Machine Trace Metrics
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
df_iterator< T > df_begin(const T &G)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This class uses information about analyze scalars to rewrite expressions in canonical form...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
loop data Loop Data Prefetch
Analysis pass that exposes the ScalarEvolution for a function.
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Represents a single loop in the control flow graph.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void preserve()
Mark an analysis as preserved.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues)
Add information about a block to the current state.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
The legacy pass manager's analysis pass to compute loop information.
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned NumInsts
Number of instructions in the analyzed blocks.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical multiply expression, or something simpler if possible.
This class represents a constant integer value.