LLVM  10.0.0svn
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ARMSelectionDAGInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMTargetMachine.h"
15 #include "llvm/IR/DerivedTypes.h"
16 using namespace llvm;
17 
18 #define DEBUG_TYPE "arm-selectiondag-info"
19 
20 // Emit, if possible, a specialized version of the given Libcall. Typically this
21 // means selecting the appropriately aligned version, but we also convert memset
22 // of 0 into memclr.
24  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
25  SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
26  const ARMSubtarget &Subtarget =
28  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
29 
30  // Only use a specialized AEABI function if the default version of this
31  // Libcall is an AEABI function.
32  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
33  return SDValue();
34 
35  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
36  // able to translate memset to memclr and use the value to index the function
37  // name array.
38  enum {
39  AEABI_MEMCPY = 0,
40  AEABI_MEMMOVE,
41  AEABI_MEMSET,
42  AEABI_MEMCLR
43  } AEABILibcall;
44  switch (LC) {
45  case RTLIB::MEMCPY:
46  AEABILibcall = AEABI_MEMCPY;
47  break;
48  case RTLIB::MEMMOVE:
49  AEABILibcall = AEABI_MEMMOVE;
50  break;
51  case RTLIB::MEMSET:
52  AEABILibcall = AEABI_MEMSET;
53  if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
54  if (ConstantSrc->getZExtValue() == 0)
55  AEABILibcall = AEABI_MEMCLR;
56  break;
57  default:
58  return SDValue();
59  }
60 
61  // Choose the most-aligned libcall variant that we can
62  enum {
63  ALIGN1 = 0,
64  ALIGN4,
65  ALIGN8
66  } AlignVariant;
67  if ((Align & 7) == 0)
68  AlignVariant = ALIGN8;
69  else if ((Align & 3) == 0)
70  AlignVariant = ALIGN4;
71  else
72  AlignVariant = ALIGN1;
73 
76  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
77  Entry.Node = Dst;
78  Args.push_back(Entry);
79  if (AEABILibcall == AEABI_MEMCLR) {
80  Entry.Node = Size;
81  Args.push_back(Entry);
82  } else if (AEABILibcall == AEABI_MEMSET) {
83  // Adjust parameters for memset, EABI uses format (ptr, size, value),
84  // GNU library uses (ptr, value, size)
85  // See RTABI section 4.3.4
86  Entry.Node = Size;
87  Args.push_back(Entry);
88 
89  // Extend or truncate the argument to be an i32 value for the call.
90  if (Src.getValueType().bitsGT(MVT::i32))
91  Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
92  else if (Src.getValueType().bitsLT(MVT::i32))
93  Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
94 
95  Entry.Node = Src;
96  Entry.Ty = Type::getInt32Ty(*DAG.getContext());
97  Entry.IsSExt = false;
98  Args.push_back(Entry);
99  } else {
100  Entry.Node = Src;
101  Args.push_back(Entry);
102 
103  Entry.Node = Size;
104  Args.push_back(Entry);
105  }
106 
107  char const *FunctionNames[4][3] = {
108  { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
109  { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
110  { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
111  { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
112  };
114  CLI.setDebugLoc(dl)
115  .setChain(Chain)
116  .setLibCallee(
117  TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
118  DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
119  TLI->getPointerTy(DAG.getDataLayout())),
120  std::move(Args))
121  .setDiscardResult();
122  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
123 
124  return CallResult.second;
125 }
126 
128  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
129  SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
130  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
131  const ARMSubtarget &Subtarget =
133  // Do repeated 4-byte loads and stores. To be improved.
134  // This requires 4-byte alignment.
135  if ((Align & 3) != 0)
136  return SDValue();
137  // This requires the copy size to be a constant, preferably
138  // within a subtarget-specific limit.
139  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
140  if (!ConstantSize)
141  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
142  RTLIB::MEMCPY);
143  uint64_t SizeVal = ConstantSize->getZExtValue();
144  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
145  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
146  RTLIB::MEMCPY);
147 
148  unsigned BytesLeft = SizeVal & 3;
149  unsigned NumMemOps = SizeVal >> 2;
150  unsigned EmittedNumMemOps = 0;
151  EVT VT = MVT::i32;
152  unsigned VTSize = 4;
153  unsigned i = 0;
154  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
155  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
156  SDValue TFOps[6];
157  SDValue Loads[6];
158  uint64_t SrcOff = 0, DstOff = 0;
159 
160  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
161  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
162  // pressure on the general purpose registers. However this seems harder to map
163  // onto the register allocator's view of the world.
164 
165  // The number of MEMCPY pseudo-instructions to emit. We use up to
166  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
167  // later on. This is a lower bound on the number of MEMCPY operations we must
168  // emit.
169  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
170 
171  // Code size optimisation: do not inline memcpy if expansion results in
172  // more instructions than the libary call.
173  if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
174  return SDValue();
175  }
176 
178 
179  for (unsigned I = 0; I != NumMEMCPYs; ++I) {
180  // Evenly distribute registers among MEMCPY operations to reduce register
181  // pressure.
182  unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
183  unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
184 
185  Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
186  DAG.getConstant(NumRegs, dl, MVT::i32));
187  Src = Dst.getValue(1);
188  Chain = Dst.getValue(2);
189 
190  DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
191  SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
192 
193  EmittedNumMemOps = NextEmittedNumMemOps;
194  }
195 
196  if (BytesLeft == 0)
197  return Chain;
198 
199  // Issue loads / stores for the trailing (1 - 3) bytes.
200  auto getRemainingValueType = [](unsigned BytesLeft) {
201  return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
202  };
203  auto getRemainingSize = [](unsigned BytesLeft) {
204  return (BytesLeft >= 2) ? 2 : 1;
205  };
206 
207  unsigned BytesLeftSave = BytesLeft;
208  i = 0;
209  while (BytesLeft) {
210  VT = getRemainingValueType(BytesLeft);
211  VTSize = getRemainingSize(BytesLeft);
212  Loads[i] = DAG.getLoad(VT, dl, Chain,
213  DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
214  DAG.getConstant(SrcOff, dl, MVT::i32)),
215  SrcPtrInfo.getWithOffset(SrcOff));
216  TFOps[i] = Loads[i].getValue(1);
217  ++i;
218  SrcOff += VTSize;
219  BytesLeft -= VTSize;
220  }
221  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
222  makeArrayRef(TFOps, i));
223 
224  i = 0;
225  BytesLeft = BytesLeftSave;
226  while (BytesLeft) {
227  VT = getRemainingValueType(BytesLeft);
228  VTSize = getRemainingSize(BytesLeft);
229  TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
230  DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
231  DAG.getConstant(DstOff, dl, MVT::i32)),
232  DstPtrInfo.getWithOffset(DstOff));
233  ++i;
234  DstOff += VTSize;
235  BytesLeft -= VTSize;
236  }
237  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
238  makeArrayRef(TFOps, i));
239 }
240 
242  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
243  SDValue Size, unsigned Align, bool isVolatile,
244  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
245  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
246  RTLIB::MEMMOVE);
247 }
248 
250  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
251  SDValue Size, unsigned Align, bool isVolatile,
252  MachinePointerInfo DstPtrInfo) const {
253  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
254  RTLIB::MEMSET);
255 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
EVT getValueType() const
Return the ValueType of the referenced return value.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:529
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
bool isThumb1Only() const
Definition: ARMSubtarget.h:761
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getExternalSymbol(const char *Sym, EVT VT)
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
CallLoweringInfo & setChain(SDValue InChain)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:417
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:414
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
Definition: ARMSubtarget.h:509
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:165
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:33
This structure contains all information that is necessary for lowering calls.
This class contains a discriminated union of information about pointers in memory operands...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:49
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
MachinePointerInfo getWithOffset(int64_t O) const
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:507
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:180
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
SDValue getValue(unsigned R) const
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool hasMinSize() const
Definition: ARMSubtarget.h:760
static bool isVolatile(Instruction *Inst)
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:513
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
LLVMContext * getContext() const
Definition: SelectionDAG.h:424
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.