33#define DEBUG_TYPE "nvptx"
53 static const unsigned MaxAggrCopySize = 128;
56 return "Lower aggregate copies/intrinsics into loops";
60char NVPTXLowerAggrCopies::ID = 0;
62bool NVPTXLowerAggrCopies::runOnFunction(
Function &
F) {
69 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
70 AAResults &
AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
79 if (
DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
83 if (
SI->getOperand(0) != LI)
91 if (LenCI->getZExtValue() >= MaxAggrCopySize) {
101 if (AggrLoads.
size() == 0 && MemCalls.
size() == 0) {
110 Value *SrcAddr = LI->getOperand(0);
111 Value *DstAddr =
SI->getOperand(1);
112 unsigned NumLoads =
DL.getTypeStoreSize(LI->getType());
141 unsigned SrcAS = LI->getPointerAddressSpace();
142 unsigned DstAS =
SI->getPointerAddressSpace();
143 if (SrcAS != DstAS) {
146 SrcAddr = Builder.CreateAddrSpaceCast(SrcAddr, GenericPtrTy);
147 DstAddr = Builder.CreateAddrSpaceCast(DstAddr, GenericPtrTy);
150 DstAddr,
SI->getAlign(), SrcAddr, LI->getAlign(), CopyLen,
151 LI->isVolatile() ||
SI->isVolatile())));
154 SI->eraseFromParent();
155 LI->eraseFromParent();
160 bool Expanded =
true;
169 MemCall->eraseFromParent();
178 NVPTXLowerAggrCopies,
"nvptx-lower-aggr-copies",
179 "Lower aggregate copies, and llvm.mem* intrinsics into loops",
false,
false)
183 NVPTXLowerAggrCopies,
"nvptx-lower-aggr-copies",
184 "Lower aggregate copies, and llvm.mem* intrinsics into loops",
false,
false)
187 return new NVPTXLowerAggrCopies();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
This is the shared class of boolean and integer constants.
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Representation for a specific memory location.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Abstract Attribute helper functions.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionPass * createLowerAggrCopies()
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSet as a loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.