Go to the documentation of this file.
20 #define DEBUG_TYPE "arm-selectiondag-info"
24 cl::desc(
"Control conversion of memcpy to "
25 "Tail predicated loops (WLSTP)"),
28 "Don't convert memcpy to TP loop."),
30 "Always convert memcpy to TP loop."),
32 "Allow (may be subject to certain conditions) "
33 "conversion of memcpy to TP loop.")));
47 if (std::strncmp(TLI->getLibcallName(LC),
"__aeabi", 7) != 0)
61 AEABILibcall = AEABI_MEMCPY;
64 AEABILibcall = AEABI_MEMMOVE;
67 AEABILibcall = AEABI_MEMSET;
69 if (ConstantSrc->getZExtValue() == 0)
70 AEABILibcall = AEABI_MEMCLR;
83 AlignVariant = ALIGN8;
84 else if ((
Align & 3) == 0)
85 AlignVariant = ALIGN4;
87 AlignVariant = ALIGN1;
93 Args.push_back(Entry);
94 if (AEABILibcall == AEABI_MEMCLR) {
96 Args.push_back(Entry);
97 }
else if (AEABILibcall == AEABI_MEMSET) {
102 Args.push_back(Entry);
105 if (Src.getValueType().bitsGT(
MVT::i32))
107 else if (Src.getValueType().bitsLT(
MVT::i32))
112 Entry.IsSExt =
false;
113 Args.push_back(Entry);
116 Args.push_back(Entry);
119 Args.push_back(Entry);
122 char const *FunctionNames[4][3] = {
123 {
"__aeabi_memcpy",
"__aeabi_memcpy4",
"__aeabi_memcpy8" },
124 {
"__aeabi_memmove",
"__aeabi_memmove4",
"__aeabi_memmove8" },
125 {
"__aeabi_memset",
"__aeabi_memset4",
"__aeabi_memset8" },
126 {
"__aeabi_memclr",
"__aeabi_memclr4",
"__aeabi_memclr8" }
137 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
139 return CallResult.second;
145 Align Alignment,
bool IsMemcpy) {
153 if (
F.hasOptNone() ||
F.hasOptSize())
159 if (!ConstantSize && Alignment >=
Align(4))
171 SDValue Size,
Align Alignment,
bool isVolatile,
bool AlwaysInline,
177 if (Subtarget.hasMVEIntegerOps() &&
184 if (Alignment <
Align(4))
191 uint64_t SizeVal = ConstantSize->getZExtValue();
196 unsigned BytesLeft = SizeVal & 3;
197 unsigned NumMemOps = SizeVal >> 2;
198 unsigned EmittedNumMemOps = 0;
203 const unsigned MaxLoadsInLDM = Subtarget.
isThumb1Only() ? 4 : 6;
217 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
221 if (NumMEMCPYs > 1 && Subtarget.
hasMinSize()) {
227 for (
unsigned I = 0;
I != NumMEMCPYs; ++
I) {
230 unsigned NextEmittedNumMemOps = NumMemOps * (
I + 1) / NumMEMCPYs;
231 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
235 Src = Dst.getValue(1);
236 Chain = Dst.getValue(2);
241 EmittedNumMemOps = NextEmittedNumMemOps;
248 auto getRemainingValueType = [](
unsigned BytesLeft) {
251 auto getRemainingSize = [](
unsigned BytesLeft) {
252 return (BytesLeft >= 2) ? 2 : 1;
255 unsigned BytesLeftSave = BytesLeft;
258 VT = getRemainingValueType(BytesLeft);
259 VTSize = getRemainingSize(BytesLeft);
260 Loads[
i] = DAG.
getLoad(VT, dl, Chain,
273 BytesLeft = BytesLeftSave;
275 VT = getRemainingValueType(BytesLeft);
276 VTSize = getRemainingSize(BytesLeft);
294 Alignment.value(), RTLIB::MEMMOVE);
308 if (Subtarget.hasMVEIntegerOps() &&
318 Alignment.value(), RTLIB::MEMSET);
This is an optimization pass for GlobalISel generic memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
CallLoweringInfo & setChain(SDValue InChain)
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
const ARMTargetLowering * getTargetLowering() const override
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
static IntegerType * getInt32Ty(LLVMContext &C)
LLVMContext * getContext() const
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
cl::opt< TPLoop::MemTransfer > EnableMemtransferTPLoop("arm-memtransfer-tploop", cl::Hidden, cl::desc("Control conversion of memcpy to " "Tail predicated loops (WLSTP)"), cl::init(TPLoop::ForceDisabled), cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled", "Don't convert memcpy to TP loop."), clEnumValN(TPLoop::ForceEnabled, "force-enabled", "Always convert memcpy to TP loop."), clEnumValN(TPLoop::Allow, "allow", "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop.")))
This struct is a compact representation of a valid (non-zero power of two) alignment.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
bool isThumb1Only() const
This class contains a discriminated union of information about pointers in memory operands,...
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
initializer< Ty > init(const Ty &Val)
SDValue getValue(unsigned R) const
This structure contains all information that is necessary for lowering calls.
uint64_t getZExtValue() const
MachinePointerInfo getWithOffset(int64_t O) const
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
std::vector< ArgListEntry > ArgListTy
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Function & getFunction()
Return the LLVM function that this machine code represents.
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget, const SelectionDAG &DAG, ConstantSDNode *ConstantSize, Align Alignment, bool IsMemcpy)
CallLoweringInfo & setDiscardResult(bool Value=true)
const DataLayout & getDataLayout() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
unsigned getMaxMemcpyTPInlineSizeThreshold() const
getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size that still makes it profitable to inline...
@ ADD
Simple integer binary arithmetic operators.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
static Type * getVoidTy(LLVMContext &C)
MachineFunction & getMachineFunction() const
SDValue getExternalSymbol(const char *Sym, EVT VT)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.