LLVM 20.0.0git
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ARMSelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
16using namespace llvm;
17
18#define DEBUG_TYPE "arm-selectiondag-info"
19
21 "arm-memtransfer-tploop", cl::Hidden,
22 cl::desc("Control conversion of memcpy to "
23 "Tail predicated loops (WLSTP)"),
26 "Don't convert memcpy to TP loop."),
27 clEnumValN(TPLoop::ForceEnabled, "force-enabled",
28 "Always convert memcpy to TP loop."),
30 "Allow (may be subject to certain conditions) "
31 "conversion of memcpy to TP loop.")));
32
33bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
34 return Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
36}
37
38// Emit, if possible, a specialized version of the given Libcall. Typically this
39// means selecting the appropriately aligned version, but we also convert memset
40// of 0 into memclr.
42 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
43 SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
44 const ARMSubtarget &Subtarget =
46 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
47
48 // Only use a specialized AEABI function if the default version of this
49 // Libcall is an AEABI function.
50 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
51 return SDValue();
52
53 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
54 // able to translate memset to memclr and use the value to index the function
55 // name array.
56 enum {
57 AEABI_MEMCPY = 0,
58 AEABI_MEMMOVE,
59 AEABI_MEMSET,
60 AEABI_MEMCLR
61 } AEABILibcall;
62 switch (LC) {
63 case RTLIB::MEMCPY:
64 AEABILibcall = AEABI_MEMCPY;
65 break;
66 case RTLIB::MEMMOVE:
67 AEABILibcall = AEABI_MEMMOVE;
68 break;
69 case RTLIB::MEMSET:
70 AEABILibcall = AEABI_MEMSET;
71 if (isNullConstant(Src))
72 AEABILibcall = AEABI_MEMCLR;
73 break;
74 default:
75 return SDValue();
76 }
77
78 // Choose the most-aligned libcall variant that we can
79 enum {
80 ALIGN1 = 0,
81 ALIGN4,
82 ALIGN8
83 } AlignVariant;
84 if ((Align & 7) == 0)
85 AlignVariant = ALIGN8;
86 else if ((Align & 3) == 0)
87 AlignVariant = ALIGN4;
88 else
89 AlignVariant = ALIGN1;
90
93 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
94 Entry.Node = Dst;
95 Args.push_back(Entry);
96 if (AEABILibcall == AEABI_MEMCLR) {
97 Entry.Node = Size;
98 Args.push_back(Entry);
99 } else if (AEABILibcall == AEABI_MEMSET) {
100 // Adjust parameters for memset, EABI uses format (ptr, size, value),
101 // GNU library uses (ptr, value, size)
102 // See RTABI section 4.3.4
103 Entry.Node = Size;
104 Args.push_back(Entry);
105
106 // Extend or truncate the argument to be an i32 value for the call.
107 if (Src.getValueType().bitsGT(MVT::i32))
108 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
109 else if (Src.getValueType().bitsLT(MVT::i32))
110 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
111
112 Entry.Node = Src;
113 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
114 Entry.IsSExt = false;
115 Args.push_back(Entry);
116 } else {
117 Entry.Node = Src;
118 Args.push_back(Entry);
119
120 Entry.Node = Size;
121 Args.push_back(Entry);
122 }
123
124 char const *FunctionNames[4][3] = {
125 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
126 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
127 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
128 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
129 };
131 CLI.setDebugLoc(dl)
132 .setChain(Chain)
134 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
135 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
136 TLI->getPointerTy(DAG.getDataLayout())),
137 std::move(Args))
139 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
140
141 return CallResult.second;
142}
143
144static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
145 const SelectionDAG &DAG,
146 ConstantSDNode *ConstantSize,
147 Align Alignment, bool IsMemcpy) {
148 auto &F = DAG.getMachineFunction().getFunction();
150 return false;
152 return true;
153 // Do not generate inline TP loop if optimizations is disabled,
154 // or if optimization for size (-Os or -Oz) is on.
155 if (F.hasOptNone() || F.hasOptSize())
156 return false;
157 // If cli option is unset, for memset always generate inline TP.
158 // For memcpy, check some conditions
159 if (!IsMemcpy)
160 return true;
161 if (!ConstantSize && Alignment >= Align(4))
162 return true;
163 if (ConstantSize &&
164 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
165 ConstantSize->getZExtValue() <
167 return true;
168 return false;
169}
170
172 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
173 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
174 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
175 const ARMSubtarget &Subtarget =
177 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
178
179 if (Subtarget.hasMVEIntegerOps() &&
180 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
181 return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
182 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
183
184 // Do repeated 4-byte loads and stores. To be improved.
185 // This requires 4-byte alignment.
186 if (Alignment < Align(4))
187 return SDValue();
188 // This requires the copy size to be a constant, preferably
189 // within a subtarget-specific limit.
190 if (!ConstantSize)
191 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
192 Alignment.value(), RTLIB::MEMCPY);
193 uint64_t SizeVal = ConstantSize->getZExtValue();
194 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
195 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
196 Alignment.value(), RTLIB::MEMCPY);
197
198 unsigned BytesLeft = SizeVal & 3;
199 unsigned NumMemOps = SizeVal >> 2;
200 unsigned EmittedNumMemOps = 0;
201 EVT VT = MVT::i32;
202 unsigned VTSize = 4;
203 unsigned i = 0;
204 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
205 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
206 SDValue TFOps[6];
207 SDValue Loads[6];
208 uint64_t SrcOff = 0, DstOff = 0;
209
210 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
211 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
212 // pressure on the general purpose registers. However this seems harder to map
213 // onto the register allocator's view of the world.
214
215 // The number of MEMCPY pseudo-instructions to emit. We use up to
216 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
217 // later on. This is a lower bound on the number of MEMCPY operations we must
218 // emit.
219 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
220
221 // Code size optimisation: do not inline memcpy if expansion results in
222 // more instructions than the libary call.
223 if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
224 return SDValue();
225 }
226
227 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
228
229 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
230 // Evenly distribute registers among MEMCPY operations to reduce register
231 // pressure.
232 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
233 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
234
235 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
236 DAG.getConstant(NumRegs, dl, MVT::i32));
237 Src = Dst.getValue(1);
238 Chain = Dst.getValue(2);
239
240 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
241 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
242
243 EmittedNumMemOps = NextEmittedNumMemOps;
244 }
245
246 if (BytesLeft == 0)
247 return Chain;
248
249 // Issue loads / stores for the trailing (1 - 3) bytes.
250 auto getRemainingValueType = [](unsigned BytesLeft) {
251 return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
252 };
253 auto getRemainingSize = [](unsigned BytesLeft) {
254 return (BytesLeft >= 2) ? 2 : 1;
255 };
256
257 unsigned BytesLeftSave = BytesLeft;
258 i = 0;
259 while (BytesLeft) {
260 VT = getRemainingValueType(BytesLeft);
261 VTSize = getRemainingSize(BytesLeft);
262 Loads[i] = DAG.getLoad(VT, dl, Chain,
263 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
264 DAG.getConstant(SrcOff, dl, MVT::i32)),
265 SrcPtrInfo.getWithOffset(SrcOff));
266 TFOps[i] = Loads[i].getValue(1);
267 ++i;
268 SrcOff += VTSize;
269 BytesLeft -= VTSize;
270 }
271 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
272
273 i = 0;
274 BytesLeft = BytesLeftSave;
275 while (BytesLeft) {
276 VT = getRemainingValueType(BytesLeft);
277 VTSize = getRemainingSize(BytesLeft);
278 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
279 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
280 DAG.getConstant(DstOff, dl, MVT::i32)),
281 DstPtrInfo.getWithOffset(DstOff));
282 ++i;
283 DstOff += VTSize;
284 BytesLeft -= VTSize;
285 }
286 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
287}
288
290 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
291 SDValue Size, Align Alignment, bool isVolatile,
292 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
293 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
294 Alignment.value(), RTLIB::MEMMOVE);
295}
296
298 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
299 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
300 MachinePointerInfo DstPtrInfo) const {
301
302 const ARMSubtarget &Subtarget =
304
305 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
306
307 // Generate TP loop for llvm.memset
308 if (Subtarget.hasMVEIntegerOps() &&
309 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
310 false)) {
311 Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
312 DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
313 return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
314 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
315 }
316
317 if (!AlwaysInline)
318 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
319 Alignment.value(), RTLIB::MEMSET);
320
321 return SDValue();
322}
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget, const SelectionDAG &DAG, ConstantSDNode *ConstantSize, Align Alignment, bool IsMemcpy)
cl::opt< TPLoop::MemTransfer > EnableMemtransferTPLoop("arm-memtransfer-tploop", cl::Hidden, cl::desc("Control conversion of memcpy to " "Tail predicated loops (WLSTP)"), cl::init(TPLoop::ForceDisabled), cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled", "Don't convert memcpy to TP loop."), clEnumValN(TPLoop::ForceEnabled, "force-enabled", "Always convert memcpy to TP loop."), clEnumValN(TPLoop::Allow, "allow", "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop.")))
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
bool isTargetMemoryOpcode(unsigned Opcode) const override
Returns true if a node with the given target-specific opcode has a memory operand.
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:242
unsigned getMaxMemcpyTPInlineSizeThreshold() const
getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size that still makes it profitable to inline...
Definition: ARMSubtarget.h:224
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
Definition: ARMSubtarget.h:216
bool hasMinSize() const
Definition: ARMSubtarget.h:402
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:851
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:871
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
std::vector< ArgListEntry > ArgListTy
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)