18 if (
VectorType *VTy = dyn_cast<VectorType>(Type)) {
19 return VTy->getBitWidth() / 8;
27 unsigned SrcAlign,
unsigned DestAlign,
28 bool SrcIsVolatile,
bool DstIsVolatile,
44 uint64_t LoopEndCount = CopyLen->
getZExtValue() / LoopOpSize;
46 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
47 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
49 if (LoopEndCount != 0) {
62 if (SrcAddr->
getType() != SrcOpType) {
63 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
65 if (DstAddr->
getType() != DstOpType) {
66 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
78 LoopBuilder.
CreateStore(Load, DstGEP, DstIsVolatile);
90 uint64_t BytesCopied = LoopEndCount * LoopOpSize;
91 uint64_t RemainingBytes = CopyLen->
getZExtValue() - BytesCopied;
97 SrcAlign = std::min(SrcAlign, LoopOpSize);
98 DestAlign = std::min(DestAlign, LoopOpSize);
102 SrcAlign, DestAlign);
104 for (
auto OpTy : RemainingOps) {
107 uint64_t GepIndex = BytesCopied / OperandSize;
108 assert(GepIndex * OperandSize == BytesCopied &&
109 "Division should have no Remainder!");
114 : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
115 Value *SrcGEP = RBuilder.CreateInBoundsGEP(
117 Value *
Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile);
123 : RBuilder.CreateBitCast(DstAddr, DstPtrType);
124 Value *DstGEP = RBuilder.CreateInBoundsGEP(
126 RBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
128 BytesCopied += OperandSize;
132 "Bytes copied should match size in the call!");
137 Value *CopyLen,
unsigned SrcAlign,
138 unsigned DestAlign,
bool SrcIsVolatile,
143 PreLoopBB->
splitBasicBlock(InsertBefore,
"post-loop-memcpy-expansion");
154 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
155 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
158 if (SrcAddr->
getType() != SrcOpType) {
159 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
161 if (DstAddr->
getType() != DstOpType) {
162 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
169 "expected size argument to memcpy to be an integer type!");
171 bool LoopOpIsInt8 = LoopOpType == Int8Type;
173 Value *RuntimeLoopCount = LoopOpIsInt8 ?
175 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
186 LoopBuilder.
CreateStore(Load, DstGEP, DstIsVolatile);
194 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
195 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
203 Ctx,
"loop-memcpy-residual-header", PreLoopBB->
getParent(),
nullptr);
211 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
212 LoopBB, ResHeaderBB);
216 LoopBuilder.
CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
222 ResLoopBB, PostLoopBB);
227 ResBuilder.
CreatePHI(CopyLenType, 2,
"residual-loop-index");
234 Value *FullOffset = ResBuilder.
CreateAdd(RuntimeBytesCopied, ResidualIndex);
240 ResBuilder.
CreateStore(Load, DstGEP, DstIsVolatile);
244 ResidualIndex->
addIncoming(ResNewIndex, ResLoopBB);
248 ResBuilder.
CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
256 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
260 LoopBuilder.
CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
289 unsigned SrcAlign,
unsigned DestAlign,
290 bool SrcIsVolatile,
bool DstIsVolatile) {
295 Type *EltTy = cast<PointerType>(SrcAddr->
getType())->getElementType();
304 SrcAddr, DstAddr,
"compare_src_dst");
315 CopyBackwardsBB->
setName(
"copy_backwards");
317 CopyForwardBB->
setName(
"copy_forward");
319 ExitBB->
setName(
"memmove_done");
331 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
332 Value *IndexPtr = LoopBuilder.CreateSub(
334 Value *Element = LoopBuilder.CreateLoad(
335 EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
337 LoopBuilder.CreateStore(
338 Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr));
339 LoopBuilder.CreateCondBr(
351 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0,
"index_ptr");
352 Value *FwdElement = FwdLoopBuilder.CreateLoad(
353 EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi),
355 FwdLoopBuilder.CreateStore(
356 FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi));
357 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
359 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
382 unsigned dstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
383 DstAddr = Builder.CreateBitCast(DstAddr,
386 Builder.CreateCondBr(
static unsigned getLoopOperandSizeInBytes(Type *Type)
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
This class represents lattice values for constants.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned getSourceAlignment() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
This class wraps the llvm.memset intrinsic.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Value * getLength() const
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
LLVMContext & getContext() const
Get the context in which this basic block lives.
This class wraps the llvm.memmove intrinsic.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
void setName(const Twine &Name)
Change the name of the value.
unsigned getDestAlignment() const
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Type * getType() const
All values are typed, get the type of this value.
Class to represent pointers.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
This is an important base class in LLVM.
This instruction compares its operands according to the predicate given to the constructor.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Class to represent integer types.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
This is the shared class of boolean and integer constants.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
This class wraps the llvm.memcpy intrinsic.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Value * getRawSource() const
Return the arguments to the instruction.
Class to represent vector types.
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, unsigned Align, bool IsVolatile)
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
const Function * getParent() const
Return the enclosing method, or null if none.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant...
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile)
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant...
static IntegerType * getInt8Ty(LLVMContext &C)
Value * getRawDest() const
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI)
Expand MemCpy as a loop. MemCpy is not deleted.
const BasicBlock * getParent() const