LLVM 23.0.0git
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ARMSelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMSelectionDAGInfo.h"
17
18#define GET_SDNODE_DESC
19#include "ARMGenSDNodeInfo.inc"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "arm-selectiondag-info"
24
26 "arm-memtransfer-tploop", cl::Hidden,
27 cl::desc("Control conversion of memcpy to "
28 "Tail predicated loops (WLSTP)"),
31 "Don't convert memcpy to TP loop."),
32 clEnumValN(TPLoop::ForceEnabled, "force-enabled",
33 "Always convert memcpy to TP loop."),
35 "Allow (may be subject to certain conditions) "
36 "conversion of memcpy to TP loop.")));
37
40
84
85bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
86 // These nodes don't have corresponding entries in *.td files yet.
87 if (Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
89 return true;
90
92}
93
95 const SDNode *N) const {
96 switch (N->getOpcode()) {
97 default:
98 break;
99 case ARMISD::WIN__DBZCHK:
100 // invalid number of results; expected 2, got 1
101 case ARMISD::WIN__CHKSTK:
102 // invalid number of results; expected 1, got 2
103 case ARMISD::COPY_STRUCT_BYVAL:
104 // invalid number of operands; expected 6, got 5
105 case ARMISD::MEMCPY:
106 // invalid number of operands; expected 5, got 4
107 case ARMISD::VMOVRRD:
108 // operand #0 must have type f64, but has type v1i64/v4f16/v8i8
109 case ARMISD::VMOVIMM:
110 // operand #0 must have type i32, but has type i16
111 return;
112 }
113
115}
116
117// Emit, if possible, a specialized version of the given Libcall. Typically this
118// means selecting the appropriately aligned version, but we also convert memset
119// of 0 into memclr.
121 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
122 SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
123 const ARMSubtarget &Subtarget =
125 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
126
127 // Only use a specialized AEABI function if the default version of this
128 // Libcall is an AEABI function.
129 //
130 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
131 // able to translate memset to memclr and use the value to index the function
132 // name array.
133 enum {
134 AEABI_MEMCPY = 0,
135 AEABI_MEMMOVE,
136 AEABI_MEMSET,
137 AEABI_MEMCLR
138 } AEABILibcall;
139 switch (LC) {
140 case RTLIB::MEMCPY:
141 if (DAG.getLibcalls().getLibcallImpl(LC) != RTLIB::impl___aeabi_memcpy)
142 return SDValue();
143
144 AEABILibcall = AEABI_MEMCPY;
145 break;
146 case RTLIB::MEMMOVE:
147 if (DAG.getLibcalls().getLibcallImpl(LC) != RTLIB::impl___aeabi_memmove)
148 return SDValue();
149
150 AEABILibcall = AEABI_MEMMOVE;
151 break;
152 case RTLIB::MEMSET:
153 if (DAG.getLibcalls().getLibcallImpl(LC) != RTLIB::impl___aeabi_memset)
154 return SDValue();
155
156 AEABILibcall = AEABI_MEMSET;
157 if (isNullConstant(Src))
158 AEABILibcall = AEABI_MEMCLR;
159 break;
160 default:
161 return SDValue();
162 }
163
164 // Choose the most-aligned libcall variant that we can
165 enum {
166 ALIGN1 = 0,
167 ALIGN4,
168 ALIGN8
169 } AlignVariant;
170 if ((Align & 7) == 0)
171 AlignVariant = ALIGN8;
172 else if ((Align & 3) == 0)
173 AlignVariant = ALIGN4;
174 else
175 AlignVariant = ALIGN1;
176
178 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
179 Args.emplace_back(Dst, IntPtrTy);
180 if (AEABILibcall == AEABI_MEMCLR) {
181 Args.emplace_back(Size, IntPtrTy);
182 } else if (AEABILibcall == AEABI_MEMSET) {
183 // Adjust parameters for memset, EABI uses format (ptr, size, value),
184 // GNU library uses (ptr, value, size)
185 // See RTABI section 4.3.4
186 Args.emplace_back(Size, IntPtrTy);
187
188 // Extend or truncate the argument to be an i32 value for the call.
189 if (Src.getValueType().bitsGT(MVT::i32))
190 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
191 else if (Src.getValueType().bitsLT(MVT::i32))
192 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
193
196 Entry.IsSExt = false;
197 Args.push_back(Entry);
198 } else {
199 Args.emplace_back(Src, IntPtrTy);
200 Args.emplace_back(Size, IntPtrTy);
201 }
202
203 static const RTLIB::Libcall FunctionImpls[4][3] = {
204 {RTLIB::MEMCPY, RTLIB::AEABI_MEMCPY4, RTLIB::AEABI_MEMCPY8},
205 {RTLIB::MEMMOVE, RTLIB::AEABI_MEMMOVE4, RTLIB::AEABI_MEMMOVE8},
206 {RTLIB::MEMSET, RTLIB::AEABI_MEMSET4, RTLIB::AEABI_MEMSET8},
207 {RTLIB::AEABI_MEMCLR, RTLIB::AEABI_MEMCLR4, RTLIB::AEABI_MEMCLR8}};
208
209 RTLIB::Libcall NewLC = FunctionImpls[AEABILibcall][AlignVariant];
210 RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(NewLC);
211 if (LCImpl == RTLIB::Unsupported)
212 return SDValue();
213
215 CLI.setDebugLoc(dl)
216 .setChain(Chain)
220 DAG.getExternalSymbol(LCImpl, TLI->getPointerTy(DAG.getDataLayout())),
221 std::move(Args))
223 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
224
225 return CallResult.second;
226}
227
228static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
229 const SelectionDAG &DAG,
230 ConstantSDNode *ConstantSize,
231 Align Alignment, bool IsMemcpy) {
232 auto &F = DAG.getMachineFunction().getFunction();
234 return false;
236 return true;
237 // Do not generate inline TP loop if optimizations is disabled,
238 // or if optimization for size (-Os or -Oz) is on.
239 if (F.hasOptNone() || F.hasOptSize())
240 return false;
241 // If cli option is unset, for memset always generate inline TP.
242 // For memcpy, check some conditions
243 if (!IsMemcpy)
244 return true;
245 if (!ConstantSize && Alignment >= Align(4))
246 return true;
247 if (ConstantSize &&
248 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
249 ConstantSize->getZExtValue() <
251 return true;
252 return false;
253}
254
256 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
257 SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile,
258 bool AlwaysInline, MachinePointerInfo DstPtrInfo,
259 MachinePointerInfo SrcPtrInfo) const {
260 Align Alignment = std::min(DstAlign, SrcAlign);
261 const ARMSubtarget &Subtarget =
264
265 if (Subtarget.hasMVEIntegerOps() &&
266 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
267 return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
268 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
269
270 // Do repeated 4-byte loads and stores. To be improved.
271 // This requires 4-byte alignment.
272 if (Alignment < Align(4))
273 return SDValue();
274 // This requires the copy size to be a constant, preferably
275 // within a subtarget-specific limit.
276 if (!ConstantSize)
277 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
278 Alignment.value(), RTLIB::MEMCPY);
279 uint64_t SizeVal = ConstantSize->getZExtValue();
280 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
281 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
282 Alignment.value(), RTLIB::MEMCPY);
283
284 unsigned BytesLeft = SizeVal & 3;
285 unsigned NumMemOps = SizeVal >> 2;
286 unsigned EmittedNumMemOps = 0;
287 EVT VT = MVT::i32;
288 unsigned VTSize = 4;
289 unsigned i = 0;
290 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
291 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
292 SDValue TFOps[6];
293 SDValue Loads[6];
294 uint64_t SrcOff = 0, DstOff = 0;
295
296 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
297 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
298 // pressure on the general purpose registers. However this seems harder to map
299 // onto the register allocator's view of the world.
300
301 // The number of MEMCPY pseudo-instructions to emit. We use up to
302 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
303 // later on. This is a lower bound on the number of MEMCPY operations we must
304 // emit.
305 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
306
307 // Code size optimisation: do not inline memcpy if expansion results in
308 // more instructions than the library call.
309 if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
310 return SDValue();
311 }
312
313 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
314
315 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
316 // Evenly distribute registers among MEMCPY operations to reduce register
317 // pressure.
318 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
319 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
320
321 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
322 DAG.getConstant(NumRegs, dl, MVT::i32));
323 Src = Dst.getValue(1);
324 Chain = Dst.getValue(2);
325
326 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
327 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
328
329 EmittedNumMemOps = NextEmittedNumMemOps;
330 }
331
332 if (BytesLeft == 0)
333 return Chain;
334
335 // Issue loads / stores for the trailing (1 - 3) bytes.
336 auto getRemainingValueType = [](unsigned BytesLeft) {
337 return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
338 };
339 auto getRemainingSize = [](unsigned BytesLeft) {
340 return (BytesLeft >= 2) ? 2 : 1;
341 };
342
343 unsigned BytesLeftSave = BytesLeft;
344 i = 0;
345 while (BytesLeft) {
346 VT = getRemainingValueType(BytesLeft);
347 VTSize = getRemainingSize(BytesLeft);
348 Loads[i] = DAG.getLoad(VT, dl, Chain,
349 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
350 DAG.getConstant(SrcOff, dl, MVT::i32)),
351 SrcPtrInfo.getWithOffset(SrcOff));
352 TFOps[i] = Loads[i].getValue(1);
353 ++i;
354 SrcOff += VTSize;
355 BytesLeft -= VTSize;
356 }
357 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
358
359 i = 0;
360 BytesLeft = BytesLeftSave;
361 while (BytesLeft) {
362 VT = getRemainingValueType(BytesLeft);
363 VTSize = getRemainingSize(BytesLeft);
364 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
365 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
366 DAG.getConstant(DstOff, dl, MVT::i32)),
367 DstPtrInfo.getWithOffset(DstOff));
368 ++i;
369 DstOff += VTSize;
370 BytesLeft -= VTSize;
371 }
372 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
373}
374
376 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
377 SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile,
378 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
379 Align Alignment = std::min(DstAlign, SrcAlign);
380 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
381 Alignment.value(), RTLIB::MEMMOVE);
382}
383
385 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
386 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
387 MachinePointerInfo DstPtrInfo) const {
388
389 const ARMSubtarget &Subtarget =
391
393
394 // Generate TP loop for llvm.memset
395 if (Subtarget.hasMVEIntegerOps() &&
396 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
397 false)) {
398 Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
399 DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
400 return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
401 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
402 }
403
404 if (!AlwaysInline)
405 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
406 Alignment.value(), RTLIB::MEMSET);
407
408 return SDValue();
409}
return SDValue()
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget, const SelectionDAG &DAG, ConstantSDNode *ConstantSize, Align Alignment, bool IsMemcpy)
static cl::opt< TPLoop::MemTransfer > EnableMemtransferTPLoop("arm-memtransfer-tploop", cl::Hidden, cl::desc("Control conversion of memcpy to " "Tail predicated loops (WLSTP)"), cl::init(TPLoop::ForceDisabled), cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled", "Don't convert memcpy to TP loop."), clEnumValN(TPLoop::ForceEnabled, "force-enabled", "Always convert memcpy to TP loop."), clEnumValN(TPLoop::Allow, "allow", "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop.")))
This file a TargetTransformInfoImplBase conforming object specific to the ARM target machine.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define MAKE_CASE(V)
const char * getTargetNodeName(unsigned Opcode) const override
Returns the name of the given target-specific opcode, suitable for debug printing.
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
void verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const override
Checks that the given target-specific node is valid. Aborts if it is not.
bool isTargetMemoryOpcode(unsigned Opcode) const override
Returns true if a node with the given target-specific opcode has a memory operand.
bool isThumb1Only() const
const ARMTargetLowering * getTargetLowering() const override
unsigned getMaxMemcpyTPInlineSizeThreshold() const
getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size that still makes it profitable to inline...
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
bool hasMinSize() const
uint64_t getZExtValue() const
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
const char * getTargetNodeName(unsigned Opcode) const override
Returns the name of the given target-specific opcode, suitable for debug printing.
SelectionDAGGenTargetInfo(const SDNodeInfo &GenNodeInfo)
bool isTargetMemoryOpcode(unsigned Opcode) const override
Returns true if a node with the given target-specific opcode has a memory operand.
void verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const override
Checks that the given target-specific node is valid. Aborts if it is not.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const LibcallLoweringInfo & getLibcalls() const
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
std::vector< ArgListEntry > ArgListTy
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
ArrayRef(const T &OneElt) -> ArrayRef< T >
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)