Line data Source code
1 : //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements the ARMSelectionDAGInfo class.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "ARMTargetMachine.h"
15 : #include "llvm/CodeGen/SelectionDAG.h"
16 : #include "llvm/IR/DerivedTypes.h"
17 : using namespace llvm;
18 :
19 : #define DEBUG_TYPE "arm-selectiondag-info"
20 :
21 : // Emit, if possible, a specialized version of the given Libcall. Typically this
22 : // means selecting the appropriately aligned version, but we also convert memset
23 : // of 0 into memclr.
24 331 : SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
25 : SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
26 : SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
27 : const ARMSubtarget &Subtarget =
28 331 : DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
29 331 : const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
30 :
31 : // Only use a specialized AEABI function if the default version of this
32 : // Libcall is an AEABI function.
33 331 : if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
34 197 : return SDValue();
35 :
36 : // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
37 : // able to translate memset to memclr and use the value to index the function
38 : // name array.
39 : enum {
40 : AEABI_MEMCPY = 0,
41 : AEABI_MEMMOVE,
42 : AEABI_MEMSET,
43 : AEABI_MEMCLR
44 : } AEABILibcall;
45 134 : switch (LC) {
46 : case RTLIB::MEMCPY:
47 : AEABILibcall = AEABI_MEMCPY;
48 : break;
49 48 : case RTLIB::MEMMOVE:
50 : AEABILibcall = AEABI_MEMMOVE;
51 48 : break;
52 57 : case RTLIB::MEMSET:
53 : AEABILibcall = AEABI_MEMSET;
54 : if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
55 112 : if (ConstantSrc->getZExtValue() == 0)
56 : AEABILibcall = AEABI_MEMCLR;
57 : break;
58 0 : default:
59 0 : return SDValue();
60 : }
61 :
62 : // Choose the most-aligned libcall variant that we can
63 : enum {
64 : ALIGN1 = 0,
65 : ALIGN4,
66 : ALIGN8
67 : } AlignVariant;
68 134 : if ((Align & 7) == 0)
69 : AlignVariant = ALIGN8;
70 122 : else if ((Align & 3) == 0)
71 : AlignVariant = ALIGN4;
72 : else
73 : AlignVariant = ALIGN1;
74 :
75 : TargetLowering::ArgListTy Args;
76 : TargetLowering::ArgListEntry Entry;
77 134 : Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
78 134 : Entry.Node = Dst;
79 134 : Args.push_back(Entry);
80 134 : if (AEABILibcall == AEABI_MEMCLR) {
81 9 : Entry.Node = Size;
82 9 : Args.push_back(Entry);
83 125 : } else if (AEABILibcall == AEABI_MEMSET) {
84 : // Adjust parameters for memset, EABI uses format (ptr, size, value),
85 : // GNU library uses (ptr, value, size)
86 : // See RTABI section 4.3.4
87 48 : Entry.Node = Size;
88 48 : Args.push_back(Entry);
89 :
90 : // Extend or truncate the argument to be an i32 value for the call.
91 96 : if (Src.getValueType().bitsGT(MVT::i32))
92 0 : Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
93 96 : else if (Src.getValueType().bitsLT(MVT::i32))
94 48 : Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
95 :
96 48 : Entry.Node = Src;
97 48 : Entry.Ty = Type::getInt32Ty(*DAG.getContext());
98 48 : Entry.IsSExt = false;
99 48 : Args.push_back(Entry);
100 : } else {
101 77 : Entry.Node = Src;
102 77 : Args.push_back(Entry);
103 :
104 77 : Entry.Node = Size;
105 77 : Args.push_back(Entry);
106 : }
107 :
108 134 : char const *FunctionNames[4][3] = {
109 : { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
110 : { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
111 : { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
112 : { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
113 : };
114 134 : TargetLowering::CallLoweringInfo CLI(DAG);
115 : CLI.setDebugLoc(dl)
116 134 : .setChain(Chain)
117 : .setLibCallee(
118 134 : TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
119 : DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
120 : TLI->getPointerTy(DAG.getDataLayout())),
121 268 : std::move(Args))
122 : .setDiscardResult();
123 134 : std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
124 :
125 134 : return CallResult.second;
126 : }
127 :
128 289 : SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
129 : SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
130 : SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
131 : MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
132 : const ARMSubtarget &Subtarget =
133 289 : DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
134 : // Do repeated 4-byte loads and stores. To be improved.
135 : // This requires 4-byte alignment.
136 289 : if ((Align & 3) != 0)
137 190 : return SDValue();
138 : // This requires the copy size to be a constant, preferably
139 : // within a subtarget-specific limit.
140 : ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
141 : if (!ConstantSize)
142 : return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
143 0 : RTLIB::MEMCPY);
144 99 : uint64_t SizeVal = ConstantSize->getZExtValue();
145 99 : if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
146 : return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
147 63 : RTLIB::MEMCPY);
148 :
149 36 : unsigned BytesLeft = SizeVal & 3;
150 36 : unsigned NumMemOps = SizeVal >> 2;
151 : unsigned EmittedNumMemOps = 0;
152 36 : EVT VT = MVT::i32;
153 : unsigned VTSize = 4;
154 : unsigned i = 0;
155 : // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
156 36 : const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
157 36 : SDValue TFOps[6];
158 36 : SDValue Loads[6];
159 : uint64_t SrcOff = 0, DstOff = 0;
160 :
161 : // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
162 : // VLDM/VSTM and make this code emit it when appropriate. This would reduce
163 : // pressure on the general purpose registers. However this seems harder to map
164 : // onto the register allocator's view of the world.
165 :
166 : // The number of MEMCPY pseudo-instructions to emit. We use up to
167 : // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
168 : // later on. This is a lower bound on the number of MEMCPY operations we must
169 : // emit.
170 36 : unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
171 :
172 : // Code size optimisation: do not inline memcpy if expansion results in
173 : // more instructions than the libary call.
174 36 : if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
175 1 : return SDValue();
176 : }
177 :
178 35 : SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
179 :
180 91 : for (unsigned I = 0; I != NumMEMCPYs; ++I) {
181 : // Evenly distribute registers among MEMCPY operations to reduce register
182 : // pressure.
183 56 : unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
184 56 : unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
185 :
186 56 : Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
187 56 : DAG.getConstant(NumRegs, dl, MVT::i32));
188 56 : Src = Dst.getValue(1);
189 56 : Chain = Dst.getValue(2);
190 :
191 56 : DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
192 56 : SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
193 :
194 : EmittedNumMemOps = NextEmittedNumMemOps;
195 : }
196 :
197 35 : if (BytesLeft == 0)
198 26 : return Chain;
199 :
200 : // Issue loads / stores for the trailing (1 - 3) bytes.
201 : auto getRemainingValueType = [](unsigned BytesLeft) {
202 13 : return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
203 : };
204 : auto getRemainingSize = [](unsigned BytesLeft) {
205 13 : return (BytesLeft >= 2) ? 2 : 1;
206 : };
207 :
208 : unsigned BytesLeftSave = BytesLeft;
209 : i = 0;
210 22 : while (BytesLeft) {
211 13 : VT = getRemainingValueType(BytesLeft);
212 13 : VTSize = getRemainingSize(BytesLeft);
213 13 : Loads[i] = DAG.getLoad(VT, dl, Chain,
214 : DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
215 : DAG.getConstant(SrcOff, dl, MVT::i32)),
216 13 : SrcPtrInfo.getWithOffset(SrcOff));
217 13 : TFOps[i] = Loads[i].getValue(1);
218 13 : ++i;
219 13 : SrcOff += VTSize;
220 13 : BytesLeft -= VTSize;
221 : }
222 9 : Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
223 18 : makeArrayRef(TFOps, i));
224 :
225 : i = 0;
226 : BytesLeft = BytesLeftSave;
227 22 : while (BytesLeft) {
228 : VT = getRemainingValueType(BytesLeft);
229 13 : VTSize = getRemainingSize(BytesLeft);
230 13 : TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
231 : DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
232 : DAG.getConstant(DstOff, dl, MVT::i32)),
233 13 : DstPtrInfo.getWithOffset(DstOff));
234 13 : ++i;
235 13 : DstOff += VTSize;
236 13 : BytesLeft -= VTSize;
237 : }
238 : return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
239 18 : makeArrayRef(TFOps, i));
240 : }
241 :
242 120 : SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
243 : SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
244 : SDValue Size, unsigned Align, bool isVolatile,
245 : MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
246 : return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
247 120 : RTLIB::MEMMOVE);
248 : }
249 :
250 148 : SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
251 : SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
252 : SDValue Size, unsigned Align, bool isVolatile,
253 : MachinePointerInfo DstPtrInfo) const {
254 : return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
255 148 : RTLIB::MEMSET);
256 : }
|