LLVM 20.0.0git
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ARMSelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
16using namespace llvm;
17
18#define DEBUG_TYPE "arm-selectiondag-info"
19
21 "arm-memtransfer-tploop", cl::Hidden,
22 cl::desc("Control conversion of memcpy to "
23 "Tail predicated loops (WLSTP)"),
26 "Don't convert memcpy to TP loop."),
27 clEnumValN(TPLoop::ForceEnabled, "force-enabled",
28 "Always convert memcpy to TP loop."),
30 "Allow (may be subject to certain conditions) "
31 "conversion of memcpy to TP loop.")));
32
33// Emit, if possible, a specialized version of the given Libcall. Typically this
34// means selecting the appropriately aligned version, but we also convert memset
35// of 0 into memclr.
37 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
38 SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
39 const ARMSubtarget &Subtarget =
41 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
42
43 // Only use a specialized AEABI function if the default version of this
44 // Libcall is an AEABI function.
45 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
46 return SDValue();
47
48 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
49 // able to translate memset to memclr and use the value to index the function
50 // name array.
51 enum {
52 AEABI_MEMCPY = 0,
53 AEABI_MEMMOVE,
54 AEABI_MEMSET,
55 AEABI_MEMCLR
56 } AEABILibcall;
57 switch (LC) {
58 case RTLIB::MEMCPY:
59 AEABILibcall = AEABI_MEMCPY;
60 break;
61 case RTLIB::MEMMOVE:
62 AEABILibcall = AEABI_MEMMOVE;
63 break;
64 case RTLIB::MEMSET:
65 AEABILibcall = AEABI_MEMSET;
66 if (isNullConstant(Src))
67 AEABILibcall = AEABI_MEMCLR;
68 break;
69 default:
70 return SDValue();
71 }
72
73 // Choose the most-aligned libcall variant that we can
74 enum {
75 ALIGN1 = 0,
76 ALIGN4,
77 ALIGN8
78 } AlignVariant;
79 if ((Align & 7) == 0)
80 AlignVariant = ALIGN8;
81 else if ((Align & 3) == 0)
82 AlignVariant = ALIGN4;
83 else
84 AlignVariant = ALIGN1;
85
88 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
89 Entry.Node = Dst;
90 Args.push_back(Entry);
91 if (AEABILibcall == AEABI_MEMCLR) {
92 Entry.Node = Size;
93 Args.push_back(Entry);
94 } else if (AEABILibcall == AEABI_MEMSET) {
95 // Adjust parameters for memset, EABI uses format (ptr, size, value),
96 // GNU library uses (ptr, value, size)
97 // See RTABI section 4.3.4
98 Entry.Node = Size;
99 Args.push_back(Entry);
100
101 // Extend or truncate the argument to be an i32 value for the call.
102 if (Src.getValueType().bitsGT(MVT::i32))
103 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
104 else if (Src.getValueType().bitsLT(MVT::i32))
105 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
106
107 Entry.Node = Src;
108 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
109 Entry.IsSExt = false;
110 Args.push_back(Entry);
111 } else {
112 Entry.Node = Src;
113 Args.push_back(Entry);
114
115 Entry.Node = Size;
116 Args.push_back(Entry);
117 }
118
119 char const *FunctionNames[4][3] = {
120 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
121 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
122 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
123 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
124 };
126 CLI.setDebugLoc(dl)
127 .setChain(Chain)
129 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
130 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
131 TLI->getPointerTy(DAG.getDataLayout())),
132 std::move(Args))
134 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
135
136 return CallResult.second;
137}
138
139static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
140 const SelectionDAG &DAG,
141 ConstantSDNode *ConstantSize,
142 Align Alignment, bool IsMemcpy) {
143 auto &F = DAG.getMachineFunction().getFunction();
145 return false;
147 return true;
148 // Do not generate inline TP loop if optimizations is disabled,
149 // or if optimization for size (-Os or -Oz) is on.
150 if (F.hasOptNone() || F.hasOptSize())
151 return false;
152 // If cli option is unset, for memset always generate inline TP.
153 // For memcpy, check some conditions
154 if (!IsMemcpy)
155 return true;
156 if (!ConstantSize && Alignment >= Align(4))
157 return true;
158 if (ConstantSize &&
159 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
160 ConstantSize->getZExtValue() <
162 return true;
163 return false;
164}
165
167 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
168 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
169 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
170 const ARMSubtarget &Subtarget =
172 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
173
174 if (Subtarget.hasMVEIntegerOps() &&
175 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
176 return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
177 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
178
179 // Do repeated 4-byte loads and stores. To be improved.
180 // This requires 4-byte alignment.
181 if (Alignment < Align(4))
182 return SDValue();
183 // This requires the copy size to be a constant, preferably
184 // within a subtarget-specific limit.
185 if (!ConstantSize)
186 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
187 Alignment.value(), RTLIB::MEMCPY);
188 uint64_t SizeVal = ConstantSize->getZExtValue();
189 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
190 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
191 Alignment.value(), RTLIB::MEMCPY);
192
193 unsigned BytesLeft = SizeVal & 3;
194 unsigned NumMemOps = SizeVal >> 2;
195 unsigned EmittedNumMemOps = 0;
196 EVT VT = MVT::i32;
197 unsigned VTSize = 4;
198 unsigned i = 0;
199 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
200 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
201 SDValue TFOps[6];
202 SDValue Loads[6];
203 uint64_t SrcOff = 0, DstOff = 0;
204
205 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
206 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
207 // pressure on the general purpose registers. However this seems harder to map
208 // onto the register allocator's view of the world.
209
210 // The number of MEMCPY pseudo-instructions to emit. We use up to
211 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
212 // later on. This is a lower bound on the number of MEMCPY operations we must
213 // emit.
214 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
215
216 // Code size optimisation: do not inline memcpy if expansion results in
217 // more instructions than the libary call.
218 if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
219 return SDValue();
220 }
221
222 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
223
224 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
225 // Evenly distribute registers among MEMCPY operations to reduce register
226 // pressure.
227 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
228 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
229
230 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
231 DAG.getConstant(NumRegs, dl, MVT::i32));
232 Src = Dst.getValue(1);
233 Chain = Dst.getValue(2);
234
235 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
236 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
237
238 EmittedNumMemOps = NextEmittedNumMemOps;
239 }
240
241 if (BytesLeft == 0)
242 return Chain;
243
244 // Issue loads / stores for the trailing (1 - 3) bytes.
245 auto getRemainingValueType = [](unsigned BytesLeft) {
246 return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
247 };
248 auto getRemainingSize = [](unsigned BytesLeft) {
249 return (BytesLeft >= 2) ? 2 : 1;
250 };
251
252 unsigned BytesLeftSave = BytesLeft;
253 i = 0;
254 while (BytesLeft) {
255 VT = getRemainingValueType(BytesLeft);
256 VTSize = getRemainingSize(BytesLeft);
257 Loads[i] = DAG.getLoad(VT, dl, Chain,
258 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
259 DAG.getConstant(SrcOff, dl, MVT::i32)),
260 SrcPtrInfo.getWithOffset(SrcOff));
261 TFOps[i] = Loads[i].getValue(1);
262 ++i;
263 SrcOff += VTSize;
264 BytesLeft -= VTSize;
265 }
266 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
267
268 i = 0;
269 BytesLeft = BytesLeftSave;
270 while (BytesLeft) {
271 VT = getRemainingValueType(BytesLeft);
272 VTSize = getRemainingSize(BytesLeft);
273 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
274 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
275 DAG.getConstant(DstOff, dl, MVT::i32)),
276 DstPtrInfo.getWithOffset(DstOff));
277 ++i;
278 DstOff += VTSize;
279 BytesLeft -= VTSize;
280 }
281 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
282}
283
285 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
286 SDValue Size, Align Alignment, bool isVolatile,
287 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
288 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
289 Alignment.value(), RTLIB::MEMMOVE);
290}
291
293 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
294 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
295 MachinePointerInfo DstPtrInfo) const {
296
297 const ARMSubtarget &Subtarget =
299
300 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
301
302 // Generate TP loop for llvm.memset
303 if (Subtarget.hasMVEIntegerOps() &&
304 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
305 false)) {
306 Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
307 DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
308 return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
309 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
310 }
311
312 if (!AlwaysInline)
313 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
314 Alignment.value(), RTLIB::MEMSET);
315
316 return SDValue();
317}
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget, const SelectionDAG &DAG, ConstantSDNode *ConstantSize, Align Alignment, bool IsMemcpy)
cl::opt< TPLoop::MemTransfer > EnableMemtransferTPLoop("arm-memtransfer-tploop", cl::Hidden, cl::desc("Control conversion of memcpy to " "Tail predicated loops (WLSTP)"), cl::init(TPLoop::ForceDisabled), cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled", "Don't convert memcpy to TP loop."), clEnumValN(TPLoop::ForceEnabled, "force-enabled", "Always convert memcpy to TP loop."), clEnumValN(TPLoop::Allow, "allow", "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop.")))
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:242
unsigned getMaxMemcpyTPInlineSizeThreshold() const
getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size that still makes it profitable to inline...
Definition: ARMSubtarget.h:224
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
Definition: ARMSubtarget.h:216
bool hasMinSize() const
Definition: ARMSubtarget.h:402
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:851
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:871
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
std::vector< ArgListEntry > ArgListTy
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)