LLVM  6.0.0svn
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ARMSelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMTargetMachine.h"
16 #include "llvm/IR/DerivedTypes.h"
17 using namespace llvm;
18 
19 #define DEBUG_TYPE "arm-selectiondag-info"
20 
21 // Emit, if possible, a specialized version of the given Libcall. Typically this
22 // means selecting the appropriately aligned version, but we also convert memset
23 // of 0 into memclr.
25  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
26  SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
27  const ARMSubtarget &Subtarget =
29  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
30 
31  // Only use a specialized AEABI function if the default version of this
32  // Libcall is an AEABI function.
33  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
34  return SDValue();
35 
36  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
37  // able to translate memset to memclr and use the value to index the function
38  // name array.
39  enum {
40  AEABI_MEMCPY = 0,
41  AEABI_MEMMOVE,
42  AEABI_MEMSET,
43  AEABI_MEMCLR
44  } AEABILibcall;
45  switch (LC) {
46  case RTLIB::MEMCPY:
47  AEABILibcall = AEABI_MEMCPY;
48  break;
49  case RTLIB::MEMMOVE:
50  AEABILibcall = AEABI_MEMMOVE;
51  break;
52  case RTLIB::MEMSET:
53  AEABILibcall = AEABI_MEMSET;
54  if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
55  if (ConstantSrc->getZExtValue() == 0)
56  AEABILibcall = AEABI_MEMCLR;
57  break;
58  default:
59  return SDValue();
60  }
61 
62  // Choose the most-aligned libcall variant that we can
63  enum {
64  ALIGN1 = 0,
65  ALIGN4,
66  ALIGN8
67  } AlignVariant;
68  if ((Align & 7) == 0)
69  AlignVariant = ALIGN8;
70  else if ((Align & 3) == 0)
71  AlignVariant = ALIGN4;
72  else
73  AlignVariant = ALIGN1;
74 
77  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
78  Entry.Node = Dst;
79  Args.push_back(Entry);
80  if (AEABILibcall == AEABI_MEMCLR) {
81  Entry.Node = Size;
82  Args.push_back(Entry);
83  } else if (AEABILibcall == AEABI_MEMSET) {
84  // Adjust parameters for memset, EABI uses format (ptr, size, value),
85  // GNU library uses (ptr, value, size)
86  // See RTABI section 4.3.4
87  Entry.Node = Size;
88  Args.push_back(Entry);
89 
90  // Extend or truncate the argument to be an i32 value for the call.
91  if (Src.getValueType().bitsGT(MVT::i32))
92  Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
93  else if (Src.getValueType().bitsLT(MVT::i32))
94  Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
95 
96  Entry.Node = Src;
97  Entry.Ty = Type::getInt32Ty(*DAG.getContext());
98  Entry.IsSExt = false;
99  Args.push_back(Entry);
100  } else {
101  Entry.Node = Src;
102  Args.push_back(Entry);
103 
104  Entry.Node = Size;
105  Args.push_back(Entry);
106  }
107 
108  char const *FunctionNames[4][3] = {
109  { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
110  { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
111  { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
112  { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
113  };
115  CLI.setDebugLoc(dl)
116  .setChain(Chain)
117  .setLibCallee(
118  TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
119  DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
120  TLI->getPointerTy(DAG.getDataLayout())),
121  std::move(Args))
122  .setDiscardResult();
123  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
124 
125  return CallResult.second;
126 }
127 
129  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
130  SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
131  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
132  const ARMSubtarget &Subtarget =
134  // Do repeated 4-byte loads and stores. To be improved.
135  // This requires 4-byte alignment.
136  if ((Align & 3) != 0)
137  return SDValue();
138  // This requires the copy size to be a constant, preferably
139  // within a subtarget-specific limit.
140  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
141  if (!ConstantSize)
142  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
143  RTLIB::MEMCPY);
144  uint64_t SizeVal = ConstantSize->getZExtValue();
145  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
146  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
147  RTLIB::MEMCPY);
148 
149  unsigned BytesLeft = SizeVal & 3;
150  unsigned NumMemOps = SizeVal >> 2;
151  unsigned EmittedNumMemOps = 0;
152  EVT VT = MVT::i32;
153  unsigned VTSize = 4;
154  unsigned i = 0;
155  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
156  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
157  SDValue TFOps[6];
158  SDValue Loads[6];
159  uint64_t SrcOff = 0, DstOff = 0;
160 
161  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
162  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
163  // pressure on the general purpose registers. However this seems harder to map
164  // onto the register allocator's view of the world.
165 
166  // The number of MEMCPY pseudo-instructions to emit. We use up to
167  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
168  // later on. This is a lower bound on the number of MEMCPY operations we must
169  // emit.
170  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
171 
172  // Code size optimisation: do not inline memcpy if expansion results in
173  // more instructions than the libary call.
174  if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) {
175  return SDValue();
176  }
177 
179 
180  for (unsigned I = 0; I != NumMEMCPYs; ++I) {
181  // Evenly distribute registers among MEMCPY operations to reduce register
182  // pressure.
183  unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
184  unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
185 
186  Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
187  DAG.getConstant(NumRegs, dl, MVT::i32));
188  Src = Dst.getValue(1);
189  Chain = Dst.getValue(2);
190 
191  DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
192  SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
193 
194  EmittedNumMemOps = NextEmittedNumMemOps;
195  }
196 
197  if (BytesLeft == 0)
198  return Chain;
199 
200  // Issue loads / stores for the trailing (1 - 3) bytes.
201  auto getRemainingValueType = [](unsigned BytesLeft) {
202  return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
203  };
204  auto getRemainingSize = [](unsigned BytesLeft) {
205  return (BytesLeft >= 2) ? 2 : 1;
206  };
207 
208  unsigned BytesLeftSave = BytesLeft;
209  i = 0;
210  while (BytesLeft) {
211  VT = getRemainingValueType(BytesLeft);
212  VTSize = getRemainingSize(BytesLeft);
213  Loads[i] = DAG.getLoad(VT, dl, Chain,
214  DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
215  DAG.getConstant(SrcOff, dl, MVT::i32)),
216  SrcPtrInfo.getWithOffset(SrcOff));
217  TFOps[i] = Loads[i].getValue(1);
218  ++i;
219  SrcOff += VTSize;
220  BytesLeft -= VTSize;
221  }
222  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
223  makeArrayRef(TFOps, i));
224 
225  i = 0;
226  BytesLeft = BytesLeftSave;
227  while (BytesLeft) {
228  VT = getRemainingValueType(BytesLeft);
229  VTSize = getRemainingSize(BytesLeft);
230  TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
231  DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
232  DAG.getConstant(DstOff, dl, MVT::i32)),
233  DstPtrInfo.getWithOffset(DstOff));
234  ++i;
235  DstOff += VTSize;
236  BytesLeft -= VTSize;
237  }
238  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
239  makeArrayRef(TFOps, i));
240 }
241 
243  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
244  SDValue Size, unsigned Align, bool isVolatile,
245  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
246  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
247  RTLIB::MEMMOVE);
248 }
249 
251  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
252  SDValue Size, unsigned Align, bool isVolatile,
253  MachinePointerInfo DstPtrInfo) const {
254  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
255  RTLIB::MEMSET);
256 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:459
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
bool isThumb1Only() const
Definition: ARMSubtarget.h:673
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getExternalSymbol(const char *Sym, EVT VT)
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
CallLoweringInfo & setChain(SDValue InChain)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:388
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:385
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
Definition: ARMSubtarget.h:439
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:702
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
This class contains a discriminated union of information about pointers in memory operands...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:209
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
MachinePointerInfo getWithOffset(int64_t O) const
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:445
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:527
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
SDValue getValue(unsigned R) const
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static bool isVolatile(Instruction *Inst)
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:451
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
LLVMContext * getContext() const
Definition: SelectionDAG.h:393
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.