LLVM  3.7.0
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ARMSelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMTargetMachine.h"
16 #include "llvm/IR/DerivedTypes.h"
17 using namespace llvm;
18 
19 #define DEBUG_TYPE "arm-selectiondag-info"
20 
21 // Emit, if possible, a specialized version of the given Libcall. Typically this
22 // means selecting the appropriately aligned version, but we also convert memset
23 // of 0 into memclr.
26  SDValue Chain,
27  SDValue Dst, SDValue Src,
28  SDValue Size, unsigned Align,
29  RTLIB::Libcall LC) const {
30  const ARMSubtarget &Subtarget =
32  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
33 
34  // Only use a specialized AEABI function if the default version of this
35  // Libcall is an AEABI function.
36  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
37  return SDValue();
38 
39  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
40  // able to translate memset to memclr and use the value to index the function
41  // name array.
42  enum {
43  AEABI_MEMCPY = 0,
44  AEABI_MEMMOVE,
45  AEABI_MEMSET,
46  AEABI_MEMCLR
47  } AEABILibcall;
48  switch (LC) {
49  case RTLIB::MEMCPY:
50  AEABILibcall = AEABI_MEMCPY;
51  break;
52  case RTLIB::MEMMOVE:
53  AEABILibcall = AEABI_MEMMOVE;
54  break;
55  case RTLIB::MEMSET:
56  AEABILibcall = AEABI_MEMSET;
57  if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
58  if (ConstantSrc->getZExtValue() == 0)
59  AEABILibcall = AEABI_MEMCLR;
60  break;
61  default:
62  return SDValue();
63  }
64 
65  // Choose the most-aligned libcall variant that we can
66  enum {
67  ALIGN1 = 0,
68  ALIGN4,
69  ALIGN8
70  } AlignVariant;
71  if ((Align & 7) == 0)
72  AlignVariant = ALIGN8;
73  else if ((Align & 3) == 0)
74  AlignVariant = ALIGN4;
75  else
76  AlignVariant = ALIGN1;
77 
80  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
81  Entry.Node = Dst;
82  Args.push_back(Entry);
83  if (AEABILibcall == AEABI_MEMCLR) {
84  Entry.Node = Size;
85  Args.push_back(Entry);
86  } else if (AEABILibcall == AEABI_MEMSET) {
87  // Adjust parameters for memset, EABI uses format (ptr, size, value),
88  // GNU library uses (ptr, value, size)
89  // See RTABI section 4.3.4
90  Entry.Node = Size;
91  Args.push_back(Entry);
92 
93  // Extend or truncate the argument to be an i32 value for the call.
94  if (Src.getValueType().bitsGT(MVT::i32))
95  Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
96  else if (Src.getValueType().bitsLT(MVT::i32))
97  Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
98 
99  Entry.Node = Src;
100  Entry.Ty = Type::getInt32Ty(*DAG.getContext());
101  Entry.isSExt = false;
102  Args.push_back(Entry);
103  } else {
104  Entry.Node = Src;
105  Args.push_back(Entry);
106 
107  Entry.Node = Size;
108  Args.push_back(Entry);
109  }
110 
111  char const *FunctionNames[4][3] = {
112  { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
113  { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
114  { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
115  { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
116  };
118  CLI.setDebugLoc(dl)
119  .setChain(Chain)
120  .setCallee(
121  TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
122  DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
123  TLI->getPointerTy(DAG.getDataLayout())),
124  std::move(Args), 0)
125  .setDiscardResult();
126  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
127 
128  return CallResult.second;
129 }
130 
131 SDValue
133  SDValue Chain,
134  SDValue Dst, SDValue Src,
135  SDValue Size, unsigned Align,
136  bool isVolatile, bool AlwaysInline,
137  MachinePointerInfo DstPtrInfo,
138  MachinePointerInfo SrcPtrInfo) const {
139  const ARMSubtarget &Subtarget =
141  // Do repeated 4-byte loads and stores. To be improved.
142  // This requires 4-byte alignment.
143  if ((Align & 3) != 0)
144  return SDValue();
145  // This requires the copy size to be a constant, preferably
146  // within a subtarget-specific limit.
147  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
148  if (!ConstantSize)
149  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
150  RTLIB::MEMCPY);
151  uint64_t SizeVal = ConstantSize->getZExtValue();
152  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
153  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
154  RTLIB::MEMCPY);
155 
156  unsigned BytesLeft = SizeVal & 3;
157  unsigned NumMemOps = SizeVal >> 2;
158  unsigned EmittedNumMemOps = 0;
159  EVT VT = MVT::i32;
160  unsigned VTSize = 4;
161  unsigned i = 0;
162  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
163  const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
164  SDValue TFOps[6];
165  SDValue Loads[6];
166  uint64_t SrcOff = 0, DstOff = 0;
167 
168  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
169  // same number of stores. The loads and stores will get combined into
170  // ldm/stm later on.
171  while (EmittedNumMemOps < NumMemOps) {
172  for (i = 0;
173  i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
174  Loads[i] = DAG.getLoad(VT, dl, Chain,
175  DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
176  DAG.getConstant(SrcOff, dl, MVT::i32)),
177  SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
178  false, false, 0);
179  TFOps[i] = Loads[i].getValue(1);
180  SrcOff += VTSize;
181  }
182  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
183  makeArrayRef(TFOps, i));
184 
185  for (i = 0;
186  i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
187  TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
188  DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
189  DAG.getConstant(DstOff, dl, MVT::i32)),
190  DstPtrInfo.getWithOffset(DstOff),
191  isVolatile, false, 0);
192  DstOff += VTSize;
193  }
194  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
195  makeArrayRef(TFOps, i));
196 
197  EmittedNumMemOps += i;
198  }
199 
200  if (BytesLeft == 0)
201  return Chain;
202 
203  // Issue loads / stores for the trailing (1 - 3) bytes.
204  unsigned BytesLeftSave = BytesLeft;
205  i = 0;
206  while (BytesLeft) {
207  if (BytesLeft >= 2) {
208  VT = MVT::i16;
209  VTSize = 2;
210  } else {
211  VT = MVT::i8;
212  VTSize = 1;
213  }
214 
215  Loads[i] = DAG.getLoad(VT, dl, Chain,
216  DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
217  DAG.getConstant(SrcOff, dl, MVT::i32)),
218  SrcPtrInfo.getWithOffset(SrcOff),
219  false, false, false, 0);
220  TFOps[i] = Loads[i].getValue(1);
221  ++i;
222  SrcOff += VTSize;
223  BytesLeft -= VTSize;
224  }
225  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
226  makeArrayRef(TFOps, i));
227 
228  i = 0;
229  BytesLeft = BytesLeftSave;
230  while (BytesLeft) {
231  if (BytesLeft >= 2) {
232  VT = MVT::i16;
233  VTSize = 2;
234  } else {
235  VT = MVT::i8;
236  VTSize = 1;
237  }
238 
239  TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
240  DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
241  DAG.getConstant(DstOff, dl, MVT::i32)),
242  DstPtrInfo.getWithOffset(DstOff), false, false, 0);
243  ++i;
244  DstOff += VTSize;
245  BytesLeft -= VTSize;
246  }
247  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
248  makeArrayRef(TFOps, i));
249 }
250 
251 
254  SDValue Chain,
255  SDValue Dst, SDValue Src,
256  SDValue Size, unsigned Align,
257  bool isVolatile,
258  MachinePointerInfo DstPtrInfo,
259  MachinePointerInfo SrcPtrInfo) const {
260  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
262 }
263 
264 
267  SDValue Chain, SDValue Dst,
268  SDValue Src, SDValue Size,
269  unsigned Align, bool isVolatile,
270  MachinePointerInfo DstPtrInfo) const {
271  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
272  RTLIB::MEMSET);
273 }
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, unsigned FixedArgs=-1)
SDValue getValue(unsigned R) const
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
EmitTargetCodeForMemmove - Emit target-specific code that performs a memmove.
CallLoweringInfo & setDebugLoc(SDLoc dl)
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:265
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
bool isThumb1Only() const
Definition: ARMSubtarget.h:405
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
SDValue getExternalSymbol(const char *Sym, EVT VT)
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
CallLoweringInfo & setChain(SDValue InChain)
MachinePointerInfo getWithOffset(int64_t O) const
load Combine Adjacent Loads
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:225
EVT - Extended Value Type.
Definition: ValueTypes.h:31
std::vector< ArgListEntry > ArgListTy
This structure contains all information that is necessary for lowering calls.
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:177
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:694
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
Definition: ARMSubtarget.h:248
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
EmitTargetCodeForMemcpy - Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override
EmitTargetCodeForMemset - Emit target-specific code that performs a memset.
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
static bool isVolatile(Instruction *Inst)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
uint64_t getZExtValue() const