File: | build/source/llvm/lib/Target/ARM/ARMISelLowering.cpp |
Warning: | line 5093, column 7 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file defines the interfaces that ARM uses to lower LLVM code into a | |||
10 | // selection DAG. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "ARMISelLowering.h" | |||
15 | #include "ARMBaseInstrInfo.h" | |||
16 | #include "ARMBaseRegisterInfo.h" | |||
17 | #include "ARMCallingConv.h" | |||
18 | #include "ARMConstantPoolValue.h" | |||
19 | #include "ARMMachineFunctionInfo.h" | |||
20 | #include "ARMPerfectShuffle.h" | |||
21 | #include "ARMRegisterInfo.h" | |||
22 | #include "ARMSelectionDAGInfo.h" | |||
23 | #include "ARMSubtarget.h" | |||
24 | #include "ARMTargetTransformInfo.h" | |||
25 | #include "MCTargetDesc/ARMAddressingModes.h" | |||
26 | #include "MCTargetDesc/ARMBaseInfo.h" | |||
27 | #include "Utils/ARMBaseInfo.h" | |||
28 | #include "llvm/ADT/APFloat.h" | |||
29 | #include "llvm/ADT/APInt.h" | |||
30 | #include "llvm/ADT/ArrayRef.h" | |||
31 | #include "llvm/ADT/BitVector.h" | |||
32 | #include "llvm/ADT/DenseMap.h" | |||
33 | #include "llvm/ADT/STLExtras.h" | |||
34 | #include "llvm/ADT/SmallPtrSet.h" | |||
35 | #include "llvm/ADT/SmallVector.h" | |||
36 | #include "llvm/ADT/Statistic.h" | |||
37 | #include "llvm/ADT/StringExtras.h" | |||
38 | #include "llvm/ADT/StringRef.h" | |||
39 | #include "llvm/ADT/StringSwitch.h" | |||
40 | #include "llvm/ADT/Twine.h" | |||
41 | #include "llvm/Analysis/VectorUtils.h" | |||
42 | #include "llvm/CodeGen/CallingConvLower.h" | |||
43 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
44 | #include "llvm/CodeGen/IntrinsicLowering.h" | |||
45 | #include "llvm/CodeGen/MachineBasicBlock.h" | |||
46 | #include "llvm/CodeGen/MachineConstantPool.h" | |||
47 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
48 | #include "llvm/CodeGen/MachineFunction.h" | |||
49 | #include "llvm/CodeGen/MachineInstr.h" | |||
50 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
51 | #include "llvm/CodeGen/MachineJumpTableInfo.h" | |||
52 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
53 | #include "llvm/CodeGen/MachineOperand.h" | |||
54 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
55 | #include "llvm/CodeGen/RuntimeLibcalls.h" | |||
56 | #include "llvm/CodeGen/SelectionDAG.h" | |||
57 | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" | |||
58 | #include "llvm/CodeGen/SelectionDAGNodes.h" | |||
59 | #include "llvm/CodeGen/TargetInstrInfo.h" | |||
60 | #include "llvm/CodeGen/TargetLowering.h" | |||
61 | #include "llvm/CodeGen/TargetOpcodes.h" | |||
62 | #include "llvm/CodeGen/TargetRegisterInfo.h" | |||
63 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
64 | #include "llvm/CodeGen/ValueTypes.h" | |||
65 | #include "llvm/IR/Attributes.h" | |||
66 | #include "llvm/IR/CallingConv.h" | |||
67 | #include "llvm/IR/Constant.h" | |||
68 | #include "llvm/IR/Constants.h" | |||
69 | #include "llvm/IR/DataLayout.h" | |||
70 | #include "llvm/IR/DebugLoc.h" | |||
71 | #include "llvm/IR/DerivedTypes.h" | |||
72 | #include "llvm/IR/Function.h" | |||
73 | #include "llvm/IR/GlobalAlias.h" | |||
74 | #include "llvm/IR/GlobalValue.h" | |||
75 | #include "llvm/IR/GlobalVariable.h" | |||
76 | #include "llvm/IR/IRBuilder.h" | |||
77 | #include "llvm/IR/InlineAsm.h" | |||
78 | #include "llvm/IR/Instruction.h" | |||
79 | #include "llvm/IR/Instructions.h" | |||
80 | #include "llvm/IR/IntrinsicInst.h" | |||
81 | #include "llvm/IR/Intrinsics.h" | |||
82 | #include "llvm/IR/IntrinsicsARM.h" | |||
83 | #include "llvm/IR/Module.h" | |||
84 | #include "llvm/IR/PatternMatch.h" | |||
85 | #include "llvm/IR/Type.h" | |||
86 | #include "llvm/IR/User.h" | |||
87 | #include "llvm/IR/Value.h" | |||
88 | #include "llvm/MC/MCInstrDesc.h" | |||
89 | #include "llvm/MC/MCInstrItineraries.h" | |||
90 | #include "llvm/MC/MCRegisterInfo.h" | |||
91 | #include "llvm/MC/MCSchedule.h" | |||
92 | #include "llvm/Support/AtomicOrdering.h" | |||
93 | #include "llvm/Support/BranchProbability.h" | |||
94 | #include "llvm/Support/Casting.h" | |||
95 | #include "llvm/Support/CodeGen.h" | |||
96 | #include "llvm/Support/CommandLine.h" | |||
97 | #include "llvm/Support/Compiler.h" | |||
98 | #include "llvm/Support/Debug.h" | |||
99 | #include "llvm/Support/ErrorHandling.h" | |||
100 | #include "llvm/Support/KnownBits.h" | |||
101 | #include "llvm/Support/MachineValueType.h" | |||
102 | #include "llvm/Support/MathExtras.h" | |||
103 | #include "llvm/Support/raw_ostream.h" | |||
104 | #include "llvm/Target/TargetMachine.h" | |||
105 | #include "llvm/Target/TargetOptions.h" | |||
106 | #include "llvm/TargetParser/Triple.h" | |||
107 | #include <algorithm> | |||
108 | #include <cassert> | |||
109 | #include <cstdint> | |||
110 | #include <cstdlib> | |||
111 | #include <iterator> | |||
112 | #include <limits> | |||
113 | #include <optional> | |||
114 | #include <string> | |||
115 | #include <tuple> | |||
116 | #include <utility> | |||
117 | #include <vector> | |||
118 | ||||
119 | using namespace llvm; | |||
120 | using namespace llvm::PatternMatch; | |||
121 | ||||
122 | #define DEBUG_TYPE"arm-isel" "arm-isel" | |||
123 | ||||
124 | STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls" , "Number of tail calls"}; | |||
125 | STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt" , "Number of GAs materialized with movw + movt"}; | |||
126 | STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals" , "Number of loops generated for byval arguments"}; | |||
127 | STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" } | |||
128 | "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" }; | |||
129 | ||||
130 | static cl::opt<bool> | |||
131 | ARMInterworking("arm-interworking", cl::Hidden, | |||
132 | cl::desc("Enable / disable ARM interworking (for debugging only)"), | |||
133 | cl::init(true)); | |||
134 | ||||
135 | static cl::opt<bool> EnableConstpoolPromotion( | |||
136 | "arm-promote-constant", cl::Hidden, | |||
137 | cl::desc("Enable / disable promotion of unnamed_addr constants into " | |||
138 | "constant pools"), | |||
139 | cl::init(false)); // FIXME: set to true by default once PR32780 is fixed | |||
140 | static cl::opt<unsigned> ConstpoolPromotionMaxSize( | |||
141 | "arm-promote-constant-max-size", cl::Hidden, | |||
142 | cl::desc("Maximum size of constant to promote into a constant pool"), | |||
143 | cl::init(64)); | |||
144 | static cl::opt<unsigned> ConstpoolPromotionMaxTotal( | |||
145 | "arm-promote-constant-max-total", cl::Hidden, | |||
146 | cl::desc("Maximum size of ALL constants to promote into a constant pool"), | |||
147 | cl::init(128)); | |||
148 | ||||
149 | cl::opt<unsigned> | |||
150 | MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, | |||
151 | cl::desc("Maximum interleave factor for MVE VLDn to generate."), | |||
152 | cl::init(2)); | |||
153 | ||||
154 | // The APCS parameter registers. | |||
155 | static const MCPhysReg GPRArgRegs[] = { | |||
156 | ARM::R0, ARM::R1, ARM::R2, ARM::R3 | |||
157 | }; | |||
158 | ||||
159 | void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { | |||
160 | if (VT != PromotedLdStVT) { | |||
161 | setOperationAction(ISD::LOAD, VT, Promote); | |||
162 | AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); | |||
163 | ||||
164 | setOperationAction(ISD::STORE, VT, Promote); | |||
165 | AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); | |||
166 | } | |||
167 | ||||
168 | MVT ElemTy = VT.getVectorElementType(); | |||
169 | if (ElemTy != MVT::f64) | |||
170 | setOperationAction(ISD::SETCC, VT, Custom); | |||
171 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | |||
172 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | |||
173 | if (ElemTy == MVT::i32) { | |||
174 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); | |||
175 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); | |||
176 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); | |||
177 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); | |||
178 | } else { | |||
179 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | |||
180 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | |||
181 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | |||
182 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | |||
183 | } | |||
184 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | |||
185 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | |||
186 | setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); | |||
187 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); | |||
188 | setOperationAction(ISD::SELECT, VT, Expand); | |||
189 | setOperationAction(ISD::SELECT_CC, VT, Expand); | |||
190 | setOperationAction(ISD::VSELECT, VT, Expand); | |||
191 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); | |||
192 | if (VT.isInteger()) { | |||
193 | setOperationAction(ISD::SHL, VT, Custom); | |||
194 | setOperationAction(ISD::SRA, VT, Custom); | |||
195 | setOperationAction(ISD::SRL, VT, Custom); | |||
196 | } | |||
197 | ||||
198 | // Neon does not support vector divide/remainder operations. | |||
199 | setOperationAction(ISD::SDIV, VT, Expand); | |||
200 | setOperationAction(ISD::UDIV, VT, Expand); | |||
201 | setOperationAction(ISD::FDIV, VT, Expand); | |||
202 | setOperationAction(ISD::SREM, VT, Expand); | |||
203 | setOperationAction(ISD::UREM, VT, Expand); | |||
204 | setOperationAction(ISD::FREM, VT, Expand); | |||
205 | setOperationAction(ISD::SDIVREM, VT, Expand); | |||
206 | setOperationAction(ISD::UDIVREM, VT, Expand); | |||
207 | ||||
208 | if (!VT.isFloatingPoint() && | |||
209 | VT != MVT::v2i64 && VT != MVT::v1i64) | |||
210 | for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) | |||
211 | setOperationAction(Opcode, VT, Legal); | |||
212 | if (!VT.isFloatingPoint()) | |||
213 | for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) | |||
214 | setOperationAction(Opcode, VT, Legal); | |||
215 | } | |||
216 | ||||
217 | void ARMTargetLowering::addDRTypeForNEON(MVT VT) { | |||
218 | addRegisterClass(VT, &ARM::DPRRegClass); | |||
219 | addTypeForNEON(VT, MVT::f64); | |||
220 | } | |||
221 | ||||
222 | void ARMTargetLowering::addQRTypeForNEON(MVT VT) { | |||
223 | addRegisterClass(VT, &ARM::DPairRegClass); | |||
224 | addTypeForNEON(VT, MVT::v2f64); | |||
225 | } | |||
226 | ||||
227 | void ARMTargetLowering::setAllExpand(MVT VT) { | |||
228 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) | |||
229 | setOperationAction(Opc, VT, Expand); | |||
230 | ||||
231 | // We support these really simple operations even on types where all | |||
232 | // the actual arithmetic has to be broken down into simpler | |||
233 | // operations or turned into library calls. | |||
234 | setOperationAction(ISD::BITCAST, VT, Legal); | |||
235 | setOperationAction(ISD::LOAD, VT, Legal); | |||
236 | setOperationAction(ISD::STORE, VT, Legal); | |||
237 | setOperationAction(ISD::UNDEF, VT, Legal); | |||
238 | } | |||
239 | ||||
240 | void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To, | |||
241 | LegalizeAction Action) { | |||
242 | setLoadExtAction(ISD::EXTLOAD, From, To, Action); | |||
243 | setLoadExtAction(ISD::ZEXTLOAD, From, To, Action); | |||
244 | setLoadExtAction(ISD::SEXTLOAD, From, To, Action); | |||
245 | } | |||
246 | ||||
247 | void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { | |||
248 | const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; | |||
249 | ||||
250 | for (auto VT : IntTypes) { | |||
251 | addRegisterClass(VT, &ARM::MQPRRegClass); | |||
252 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | |||
253 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | |||
254 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | |||
255 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | |||
256 | setOperationAction(ISD::SHL, VT, Custom); | |||
257 | setOperationAction(ISD::SRA, VT, Custom); | |||
258 | setOperationAction(ISD::SRL, VT, Custom); | |||
259 | setOperationAction(ISD::SMIN, VT, Legal); | |||
260 | setOperationAction(ISD::SMAX, VT, Legal); | |||
261 | setOperationAction(ISD::UMIN, VT, Legal); | |||
262 | setOperationAction(ISD::UMAX, VT, Legal); | |||
263 | setOperationAction(ISD::ABS, VT, Legal); | |||
264 | setOperationAction(ISD::SETCC, VT, Custom); | |||
265 | setOperationAction(ISD::MLOAD, VT, Custom); | |||
266 | setOperationAction(ISD::MSTORE, VT, Legal); | |||
267 | setOperationAction(ISD::CTLZ, VT, Legal); | |||
268 | setOperationAction(ISD::CTTZ, VT, Custom); | |||
269 | setOperationAction(ISD::BITREVERSE, VT, Legal); | |||
270 | setOperationAction(ISD::BSWAP, VT, Legal); | |||
271 | setOperationAction(ISD::SADDSAT, VT, Legal); | |||
272 | setOperationAction(ISD::UADDSAT, VT, Legal); | |||
273 | setOperationAction(ISD::SSUBSAT, VT, Legal); | |||
274 | setOperationAction(ISD::USUBSAT, VT, Legal); | |||
275 | setOperationAction(ISD::ABDS, VT, Legal); | |||
276 | setOperationAction(ISD::ABDU, VT, Legal); | |||
277 | setOperationAction(ISD::AVGFLOORS, VT, Legal); | |||
278 | setOperationAction(ISD::AVGFLOORU, VT, Legal); | |||
279 | setOperationAction(ISD::AVGCEILS, VT, Legal); | |||
280 | setOperationAction(ISD::AVGCEILU, VT, Legal); | |||
281 | ||||
282 | // No native support for these. | |||
283 | setOperationAction(ISD::UDIV, VT, Expand); | |||
284 | setOperationAction(ISD::SDIV, VT, Expand); | |||
285 | setOperationAction(ISD::UREM, VT, Expand); | |||
286 | setOperationAction(ISD::SREM, VT, Expand); | |||
287 | setOperationAction(ISD::UDIVREM, VT, Expand); | |||
288 | setOperationAction(ISD::SDIVREM, VT, Expand); | |||
289 | setOperationAction(ISD::CTPOP, VT, Expand); | |||
290 | setOperationAction(ISD::SELECT, VT, Expand); | |||
291 | setOperationAction(ISD::SELECT_CC, VT, Expand); | |||
292 | ||||
293 | // Vector reductions | |||
294 | setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); | |||
295 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal); | |||
296 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal); | |||
297 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal); | |||
298 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal); | |||
299 | setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); | |||
300 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); | |||
301 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); | |||
302 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); | |||
303 | ||||
304 | if (!HasMVEFP) { | |||
305 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | |||
306 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | |||
307 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | |||
308 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | |||
309 | } else { | |||
310 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); | |||
311 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); | |||
312 | } | |||
313 | ||||
314 | // Pre and Post inc are supported on loads and stores | |||
315 | for (unsigned im = (unsigned)ISD::PRE_INC; | |||
316 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | |||
317 | setIndexedLoadAction(im, VT, Legal); | |||
318 | setIndexedStoreAction(im, VT, Legal); | |||
319 | setIndexedMaskedLoadAction(im, VT, Legal); | |||
320 | setIndexedMaskedStoreAction(im, VT, Legal); | |||
321 | } | |||
322 | } | |||
323 | ||||
324 | const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; | |||
325 | for (auto VT : FloatTypes) { | |||
326 | addRegisterClass(VT, &ARM::MQPRRegClass); | |||
327 | if (!HasMVEFP) | |||
328 | setAllExpand(VT); | |||
329 | ||||
330 | // These are legal or custom whether we have MVE.fp or not | |||
331 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | |||
332 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | |||
333 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom); | |||
334 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | |||
335 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | |||
336 | setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); | |||
337 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); | |||
338 | setOperationAction(ISD::SETCC, VT, Custom); | |||
339 | setOperationAction(ISD::MLOAD, VT, Custom); | |||
340 | setOperationAction(ISD::MSTORE, VT, Legal); | |||
341 | setOperationAction(ISD::SELECT, VT, Expand); | |||
342 | setOperationAction(ISD::SELECT_CC, VT, Expand); | |||
343 | ||||
344 | // Pre and Post inc are supported on loads and stores | |||
345 | for (unsigned im = (unsigned)ISD::PRE_INC; | |||
346 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | |||
347 | setIndexedLoadAction(im, VT, Legal); | |||
348 | setIndexedStoreAction(im, VT, Legal); | |||
349 | setIndexedMaskedLoadAction(im, VT, Legal); | |||
350 | setIndexedMaskedStoreAction(im, VT, Legal); | |||
351 | } | |||
352 | ||||
353 | if (HasMVEFP) { | |||
354 | setOperationAction(ISD::FMINNUM, VT, Legal); | |||
355 | setOperationAction(ISD::FMAXNUM, VT, Legal); | |||
356 | setOperationAction(ISD::FROUND, VT, Legal); | |||
357 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); | |||
358 | setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); | |||
359 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); | |||
360 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); | |||
361 | ||||
362 | // No native support for these. | |||
363 | setOperationAction(ISD::FDIV, VT, Expand); | |||
364 | setOperationAction(ISD::FREM, VT, Expand); | |||
365 | setOperationAction(ISD::FSQRT, VT, Expand); | |||
366 | setOperationAction(ISD::FSIN, VT, Expand); | |||
367 | setOperationAction(ISD::FCOS, VT, Expand); | |||
368 | setOperationAction(ISD::FPOW, VT, Expand); | |||
369 | setOperationAction(ISD::FLOG, VT, Expand); | |||
370 | setOperationAction(ISD::FLOG2, VT, Expand); | |||
371 | setOperationAction(ISD::FLOG10, VT, Expand); | |||
372 | setOperationAction(ISD::FEXP, VT, Expand); | |||
373 | setOperationAction(ISD::FEXP2, VT, Expand); | |||
374 | setOperationAction(ISD::FNEARBYINT, VT, Expand); | |||
375 | } | |||
376 | } | |||
377 | ||||
378 | // Custom Expand smaller than legal vector reductions to prevent false zero | |||
379 | // items being added. | |||
380 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom); | |||
381 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom); | |||
382 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom); | |||
383 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom); | |||
384 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom); | |||
385 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom); | |||
386 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom); | |||
387 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom); | |||
388 | ||||
389 | // We 'support' these types up to bitcast/load/store level, regardless of | |||
390 | // MVE integer-only / float support. Only doing FP data processing on the FP | |||
391 | // vector types is inhibited at integer-only level. | |||
392 | const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 }; | |||
393 | for (auto VT : LongTypes) { | |||
394 | addRegisterClass(VT, &ARM::MQPRRegClass); | |||
395 | setAllExpand(VT); | |||
396 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | |||
397 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | |||
398 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | |||
399 | setOperationAction(ISD::VSELECT, VT, Legal); | |||
400 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | |||
401 | } | |||
402 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); | |||
403 | ||||
404 | // We can do bitwise operations on v2i64 vectors | |||
405 | setOperationAction(ISD::AND, MVT::v2i64, Legal); | |||
406 | setOperationAction(ISD::OR, MVT::v2i64, Legal); | |||
407 | setOperationAction(ISD::XOR, MVT::v2i64, Legal); | |||
408 | ||||
409 | // It is legal to extload from v4i8 to v4i16 or v4i32. | |||
410 | addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal); | |||
411 | addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal); | |||
412 | addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal); | |||
413 | ||||
414 | // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16. | |||
415 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); | |||
416 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); | |||
417 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); | |||
418 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal); | |||
419 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal); | |||
420 | ||||
421 | // Some truncating stores are legal too. | |||
422 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); | |||
423 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); | |||
424 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); | |||
425 | ||||
426 | // Pre and Post inc on these are legal, given the correct extends | |||
427 | for (unsigned im = (unsigned)ISD::PRE_INC; | |||
428 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | |||
429 | for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { | |||
430 | setIndexedLoadAction(im, VT, Legal); | |||
431 | setIndexedStoreAction(im, VT, Legal); | |||
432 | setIndexedMaskedLoadAction(im, VT, Legal); | |||
433 | setIndexedMaskedStoreAction(im, VT, Legal); | |||
434 | } | |||
435 | } | |||
436 | ||||
437 | // Predicate types | |||
438 | const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1}; | |||
439 | for (auto VT : pTypes) { | |||
440 | addRegisterClass(VT, &ARM::VCCRRegClass); | |||
441 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | |||
442 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | |||
443 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); | |||
444 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); | |||
445 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | |||
446 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | |||
447 | setOperationAction(ISD::SETCC, VT, Custom); | |||
448 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); | |||
449 | setOperationAction(ISD::LOAD, VT, Custom); | |||
450 | setOperationAction(ISD::STORE, VT, Custom); | |||
451 | setOperationAction(ISD::TRUNCATE, VT, Custom); | |||
452 | setOperationAction(ISD::VSELECT, VT, Expand); | |||
453 | setOperationAction(ISD::SELECT, VT, Expand); | |||
454 | setOperationAction(ISD::SELECT_CC, VT, Expand); | |||
455 | ||||
456 | if (!HasMVEFP) { | |||
457 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | |||
458 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | |||
459 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | |||
460 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | |||
461 | } | |||
462 | } | |||
463 | setOperationAction(ISD::SETCC, MVT::v2i1, Expand); | |||
464 | setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand); | |||
465 | setOperationAction(ISD::AND, MVT::v2i1, Expand); | |||
466 | setOperationAction(ISD::OR, MVT::v2i1, Expand); | |||
467 | setOperationAction(ISD::XOR, MVT::v2i1, Expand); | |||
468 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand); | |||
469 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand); | |||
470 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand); | |||
471 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand); | |||
472 | ||||
473 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); | |||
474 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); | |||
475 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); | |||
476 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); | |||
477 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); | |||
478 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); | |||
479 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); | |||
480 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); | |||
481 | } | |||
482 | ||||
483 | ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, | |||
484 | const ARMSubtarget &STI) | |||
485 | : TargetLowering(TM), Subtarget(&STI) { | |||
486 | RegInfo = Subtarget->getRegisterInfo(); | |||
487 | Itins = Subtarget->getInstrItineraryData(); | |||
488 | ||||
489 | setBooleanContents(ZeroOrOneBooleanContent); | |||
490 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | |||
491 | ||||
492 | if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && | |||
493 | !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) { | |||
494 | bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; | |||
495 | for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) | |||
496 | setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), | |||
497 | IsHFTarget ? CallingConv::ARM_AAPCS_VFP | |||
498 | : CallingConv::ARM_AAPCS); | |||
499 | } | |||
500 | ||||
501 | if (Subtarget->isTargetMachO()) { | |||
502 | // Uses VFP for Thumb libfuncs if available. | |||
503 | if (Subtarget->isThumb() && Subtarget->hasVFP2Base() && | |||
504 | Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { | |||
505 | static const struct { | |||
506 | const RTLIB::Libcall Op; | |||
507 | const char * const Name; | |||
508 | const ISD::CondCode Cond; | |||
509 | } LibraryCalls[] = { | |||
510 | // Single-precision floating-point arithmetic. | |||
511 | { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, | |||
512 | { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, | |||
513 | { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, | |||
514 | { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, | |||
515 | ||||
516 | // Double-precision floating-point arithmetic. | |||
517 | { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, | |||
518 | { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, | |||
519 | { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, | |||
520 | { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, | |||
521 | ||||
522 | // Single-precision comparisons. | |||
523 | { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, | |||
524 | { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, | |||
525 | { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, | |||
526 | { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, | |||
527 | { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, | |||
528 | { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, | |||
529 | { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, | |||
530 | ||||
531 | // Double-precision comparisons. | |||
532 | { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, | |||
533 | { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, | |||
534 | { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, | |||
535 | { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, | |||
536 | { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, | |||
537 | { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, | |||
538 | { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, | |||
539 | ||||
540 | // Floating-point to integer conversions. | |||
541 | // i64 conversions are done via library routines even when generating VFP | |||
542 | // instructions, so use the same ones. | |||
543 | { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, | |||
544 | { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, | |||
545 | { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, | |||
546 | { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, | |||
547 | ||||
548 | // Conversions between floating types. | |||
549 | { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, | |||
550 | { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, | |||
551 | ||||
552 | // Integer to floating-point conversions. | |||
553 | // i64 conversions are done via library routines even when generating VFP | |||
554 | // instructions, so use the same ones. | |||
555 | // FIXME: There appears to be some naming inconsistency in ARM libgcc: | |||
556 | // e.g., __floatunsidf vs. __floatunssidfvfp. | |||
557 | { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, | |||
558 | { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, | |||
559 | { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, | |||
560 | { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, | |||
561 | }; | |||
562 | ||||
563 | for (const auto &LC : LibraryCalls) { | |||
564 | setLibcallName(LC.Op, LC.Name); | |||
565 | if (LC.Cond != ISD::SETCC_INVALID) | |||
566 | setCmpLibcallCC(LC.Op, LC.Cond); | |||
567 | } | |||
568 | } | |||
569 | } | |||
570 | ||||
571 | // These libcalls are not available in 32-bit. | |||
572 | setLibcallName(RTLIB::SHL_I128, nullptr); | |||
573 | setLibcallName(RTLIB::SRL_I128, nullptr); | |||
574 | setLibcallName(RTLIB::SRA_I128, nullptr); | |||
575 | setLibcallName(RTLIB::MUL_I128, nullptr); | |||
576 | setLibcallName(RTLIB::MULO_I64, nullptr); | |||
577 | setLibcallName(RTLIB::MULO_I128, nullptr); | |||
578 | ||||
579 | // RTLIB | |||
580 | if (Subtarget->isAAPCS_ABI() && | |||
581 | (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || | |||
582 | Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { | |||
583 | static const struct { | |||
584 | const RTLIB::Libcall Op; | |||
585 | const char * const Name; | |||
586 | const CallingConv::ID CC; | |||
587 | const ISD::CondCode Cond; | |||
588 | } LibraryCalls[] = { | |||
589 | // Double-precision floating-point arithmetic helper functions | |||
590 | // RTABI chapter 4.1.2, Table 2 | |||
591 | { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
592 | { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
593 | { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
594 | { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
595 | ||||
596 | // Double-precision floating-point comparison helper functions | |||
597 | // RTABI chapter 4.1.2, Table 3 | |||
598 | { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
599 | { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, | |||
600 | { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
601 | { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
602 | { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
603 | { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
604 | { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
605 | ||||
606 | // Single-precision floating-point arithmetic helper functions | |||
607 | // RTABI chapter 4.1.2, Table 4 | |||
608 | { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
609 | { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
610 | { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
611 | { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
612 | ||||
613 | // Single-precision floating-point comparison helper functions | |||
614 | // RTABI chapter 4.1.2, Table 5 | |||
615 | { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
616 | { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, | |||
617 | { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
618 | { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
619 | { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
620 | { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
621 | { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, | |||
622 | ||||
623 | // Floating-point to integer conversions. | |||
624 | // RTABI chapter 4.1.2, Table 6 | |||
625 | { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
626 | { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
627 | { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
628 | { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
629 | { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
630 | { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
631 | { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
632 | { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
633 | ||||
634 | // Conversions between floating types. | |||
635 | // RTABI chapter 4.1.2, Table 7 | |||
636 | { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
637 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
638 | { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
639 | ||||
640 | // Integer to floating-point conversions. | |||
641 | // RTABI chapter 4.1.2, Table 8 | |||
642 | { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
643 | { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
644 | { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
645 | { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
646 | { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
647 | { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
648 | { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
649 | { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
650 | ||||
651 | // Long long helper functions | |||
652 | // RTABI chapter 4.2, Table 9 | |||
653 | { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
654 | { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
655 | { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
656 | { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
657 | ||||
658 | // Integer division functions | |||
659 | // RTABI chapter 4.3.1 | |||
660 | { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
661 | { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
662 | { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
663 | { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
664 | { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
665 | { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
666 | { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
667 | { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
668 | }; | |||
669 | ||||
670 | for (const auto &LC : LibraryCalls) { | |||
671 | setLibcallName(LC.Op, LC.Name); | |||
672 | setLibcallCallingConv(LC.Op, LC.CC); | |||
673 | if (LC.Cond != ISD::SETCC_INVALID) | |||
674 | setCmpLibcallCC(LC.Op, LC.Cond); | |||
675 | } | |||
676 | ||||
677 | // EABI dependent RTLIB | |||
678 | if (TM.Options.EABIVersion == EABI::EABI4 || | |||
679 | TM.Options.EABIVersion == EABI::EABI5) { | |||
680 | static const struct { | |||
681 | const RTLIB::Libcall Op; | |||
682 | const char *const Name; | |||
683 | const CallingConv::ID CC; | |||
684 | const ISD::CondCode Cond; | |||
685 | } MemOpsLibraryCalls[] = { | |||
686 | // Memory operations | |||
687 | // RTABI chapter 4.3.4 | |||
688 | { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
689 | { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
690 | { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | |||
691 | }; | |||
692 | ||||
693 | for (const auto &LC : MemOpsLibraryCalls) { | |||
694 | setLibcallName(LC.Op, LC.Name); | |||
695 | setLibcallCallingConv(LC.Op, LC.CC); | |||
696 | if (LC.Cond != ISD::SETCC_INVALID) | |||
697 | setCmpLibcallCC(LC.Op, LC.Cond); | |||
698 | } | |||
699 | } | |||
700 | } | |||
701 | ||||
702 | if (Subtarget->isTargetWindows()) { | |||
703 | static const struct { | |||
704 | const RTLIB::Libcall Op; | |||
705 | const char * const Name; | |||
706 | const CallingConv::ID CC; | |||
707 | } LibraryCalls[] = { | |||
708 | { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, | |||
709 | { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, | |||
710 | { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, | |||
711 | { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, | |||
712 | { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, | |||
713 | { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, | |||
714 | { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, | |||
715 | { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, | |||
716 | }; | |||
717 | ||||
718 | for (const auto &LC : LibraryCalls) { | |||
719 | setLibcallName(LC.Op, LC.Name); | |||
720 | setLibcallCallingConv(LC.Op, LC.CC); | |||
721 | } | |||
722 | } | |||
723 | ||||
724 | // Use divmod compiler-rt calls for iOS 5.0 and later. | |||
725 | if (Subtarget->isTargetMachO() && | |||
726 | !(Subtarget->isTargetIOS() && | |||
727 | Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { | |||
728 | setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); | |||
729 | setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); | |||
730 | } | |||
731 | ||||
732 | // The half <-> float conversion functions are always soft-float on | |||
733 | // non-watchos platforms, but are needed for some targets which use a | |||
734 | // hard-float calling convention by default. | |||
735 | if (!Subtarget->isTargetWatchABI()) { | |||
736 | if (Subtarget->isAAPCS_ABI()) { | |||
737 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); | |||
738 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); | |||
739 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); | |||
740 | } else { | |||
741 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); | |||
742 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); | |||
743 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); | |||
744 | } | |||
745 | } | |||
746 | ||||
747 | // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have | |||
748 | // a __gnu_ prefix (which is the default). | |||
749 | if (Subtarget->isTargetAEABI()) { | |||
750 | static const struct { | |||
751 | const RTLIB::Libcall Op; | |||
752 | const char * const Name; | |||
753 | const CallingConv::ID CC; | |||
754 | } LibraryCalls[] = { | |||
755 | { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, | |||
756 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, | |||
757 | { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, | |||
758 | }; | |||
759 | ||||
760 | for (const auto &LC : LibraryCalls) { | |||
761 | setLibcallName(LC.Op, LC.Name); | |||
762 | setLibcallCallingConv(LC.Op, LC.CC); | |||
763 | } | |||
764 | } | |||
765 | ||||
766 | if (Subtarget->isThumb1Only()) | |||
767 | addRegisterClass(MVT::i32, &ARM::tGPRRegClass); | |||
768 | else | |||
769 | addRegisterClass(MVT::i32, &ARM::GPRRegClass); | |||
770 | ||||
771 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() && | |||
772 | Subtarget->hasFPRegs()) { | |||
773 | addRegisterClass(MVT::f32, &ARM::SPRRegClass); | |||
774 | addRegisterClass(MVT::f64, &ARM::DPRRegClass); | |||
775 | ||||
776 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); | |||
777 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); | |||
778 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); | |||
779 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); | |||
780 | ||||
781 | if (!Subtarget->hasVFP2Base()) | |||
782 | setAllExpand(MVT::f32); | |||
783 | if (!Subtarget->hasFP64()) | |||
784 | setAllExpand(MVT::f64); | |||
785 | } | |||
786 | ||||
787 | if (Subtarget->hasFullFP16()) { | |||
788 | addRegisterClass(MVT::f16, &ARM::HPRRegClass); | |||
789 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); | |||
790 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); | |||
791 | ||||
792 | setOperationAction(ISD::FMINNUM, MVT::f16, Legal); | |||
793 | setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); | |||
794 | } | |||
795 | ||||
796 | if (Subtarget->hasBF16()) { | |||
797 | addRegisterClass(MVT::bf16, &ARM::HPRRegClass); | |||
798 | setAllExpand(MVT::bf16); | |||
799 | if (!Subtarget->hasFullFP16()) | |||
800 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); | |||
801 | } | |||
802 | ||||
803 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | |||
804 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { | |||
805 | setTruncStoreAction(VT, InnerVT, Expand); | |||
806 | addAllExtLoads(VT, InnerVT, Expand); | |||
807 | } | |||
808 | ||||
809 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); | |||
810 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); | |||
811 | ||||
812 | setOperationAction(ISD::BSWAP, VT, Expand); | |||
813 | } | |||
814 | ||||
815 | setOperationAction(ISD::ConstantFP, MVT::f32, Custom); | |||
816 | setOperationAction(ISD::ConstantFP, MVT::f64, Custom); | |||
817 | ||||
818 | setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); | |||
819 | setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); | |||
820 | ||||
821 | if (Subtarget->hasMVEIntegerOps()) | |||
822 | addMVEVectorTypes(Subtarget->hasMVEFloatOps()); | |||
823 | ||||
824 | // Combine low-overhead loop intrinsics so that we can lower i1 types. | |||
825 | if (Subtarget->hasLOB()) { | |||
826 | setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC}); | |||
827 | } | |||
828 | ||||
829 | if (Subtarget->hasNEON()) { | |||
830 | addDRTypeForNEON(MVT::v2f32); | |||
831 | addDRTypeForNEON(MVT::v8i8); | |||
832 | addDRTypeForNEON(MVT::v4i16); | |||
833 | addDRTypeForNEON(MVT::v2i32); | |||
834 | addDRTypeForNEON(MVT::v1i64); | |||
835 | ||||
836 | addQRTypeForNEON(MVT::v4f32); | |||
837 | addQRTypeForNEON(MVT::v2f64); | |||
838 | addQRTypeForNEON(MVT::v16i8); | |||
839 | addQRTypeForNEON(MVT::v8i16); | |||
840 | addQRTypeForNEON(MVT::v4i32); | |||
841 | addQRTypeForNEON(MVT::v2i64); | |||
842 | ||||
843 | if (Subtarget->hasFullFP16()) { | |||
844 | addQRTypeForNEON(MVT::v8f16); | |||
845 | addDRTypeForNEON(MVT::v4f16); | |||
846 | } | |||
847 | ||||
848 | if (Subtarget->hasBF16()) { | |||
849 | addQRTypeForNEON(MVT::v8bf16); | |||
850 | addDRTypeForNEON(MVT::v4bf16); | |||
851 | } | |||
852 | } | |||
853 | ||||
854 | if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) { | |||
855 | // v2f64 is legal so that QR subregs can be extracted as f64 elements, but | |||
856 | // none of Neon, MVE or VFP supports any arithmetic operations on it. | |||
857 | setOperationAction(ISD::FADD, MVT::v2f64, Expand); | |||
858 | setOperationAction(ISD::FSUB, MVT::v2f64, Expand); | |||
859 | setOperationAction(ISD::FMUL, MVT::v2f64, Expand); | |||
860 | // FIXME: Code duplication: FDIV and FREM are expanded always, see | |||
861 | // ARMTargetLowering::addTypeForNEON method for details. | |||
862 | setOperationAction(ISD::FDIV, MVT::v2f64, Expand); | |||
863 | setOperationAction(ISD::FREM, MVT::v2f64, Expand); | |||
864 | // FIXME: Create unittest. | |||
865 | // In another words, find a way when "copysign" appears in DAG with vector | |||
866 | // operands. | |||
867 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); | |||
868 | // FIXME: Code duplication: SETCC has custom operation action, see | |||
869 | // ARMTargetLowering::addTypeForNEON method for details. | |||
870 | setOperationAction(ISD::SETCC, MVT::v2f64, Expand); | |||
871 | // FIXME: Create unittest for FNEG and for FABS. | |||
872 | setOperationAction(ISD::FNEG, MVT::v2f64, Expand); | |||
873 | setOperationAction(ISD::FABS, MVT::v2f64, Expand); | |||
874 | setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); | |||
875 | setOperationAction(ISD::FSIN, MVT::v2f64, Expand); | |||
876 | setOperationAction(ISD::FCOS, MVT::v2f64, Expand); | |||
877 | setOperationAction(ISD::FPOW, MVT::v2f64, Expand); | |||
878 | setOperationAction(ISD::FLOG, MVT::v2f64, Expand); | |||
879 | setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); | |||
880 | setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); | |||
881 | setOperationAction(ISD::FEXP, MVT::v2f64, Expand); | |||
882 | setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); | |||
883 | // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. | |||
884 | setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); | |||
885 | setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); | |||
886 | setOperationAction(ISD::FRINT, MVT::v2f64, Expand); | |||
887 | setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); | |||
888 | setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); | |||
889 | setOperationAction(ISD::FMA, MVT::v2f64, Expand); | |||
890 | } | |||
891 | ||||
892 | if (Subtarget->hasNEON()) { | |||
893 | // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively | |||
894 | // supported for v4f32. | |||
895 | setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); | |||
896 | setOperationAction(ISD::FSIN, MVT::v4f32, Expand); | |||
897 | setOperationAction(ISD::FCOS, MVT::v4f32, Expand); | |||
898 | setOperationAction(ISD::FPOW, MVT::v4f32, Expand); | |||
899 | setOperationAction(ISD::FLOG, MVT::v4f32, Expand); | |||
900 | setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); | |||
901 | setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); | |||
902 | setOperationAction(ISD::FEXP, MVT::v4f32, Expand); | |||
903 | setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); | |||
904 | setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); | |||
905 | setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); | |||
906 | setOperationAction(ISD::FRINT, MVT::v4f32, Expand); | |||
907 | setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); | |||
908 | setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); | |||
909 | ||||
910 | // Mark v2f32 intrinsics. | |||
911 | setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); | |||
912 | setOperationAction(ISD::FSIN, MVT::v2f32, Expand); | |||
913 | setOperationAction(ISD::FCOS, MVT::v2f32, Expand); | |||
914 | setOperationAction(ISD::FPOW, MVT::v2f32, Expand); | |||
915 | setOperationAction(ISD::FLOG, MVT::v2f32, Expand); | |||
916 | setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); | |||
917 | setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); | |||
918 | setOperationAction(ISD::FEXP, MVT::v2f32, Expand); | |||
919 | setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); | |||
920 | setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); | |||
921 | setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); | |||
922 | setOperationAction(ISD::FRINT, MVT::v2f32, Expand); | |||
923 | setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); | |||
924 | setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); | |||
925 | ||||
926 | // Neon does not support some operations on v1i64 and v2i64 types. | |||
927 | setOperationAction(ISD::MUL, MVT::v1i64, Expand); | |||
928 | // Custom handling for some quad-vector types to detect VMULL. | |||
929 | setOperationAction(ISD::MUL, MVT::v8i16, Custom); | |||
930 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); | |||
931 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); | |||
932 | // Custom handling for some vector types to avoid expensive expansions | |||
933 | setOperationAction(ISD::SDIV, MVT::v4i16, Custom); | |||
934 | setOperationAction(ISD::SDIV, MVT::v8i8, Custom); | |||
935 | setOperationAction(ISD::UDIV, MVT::v4i16, Custom); | |||
936 | setOperationAction(ISD::UDIV, MVT::v8i8, Custom); | |||
937 | // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with | |||
938 | // a destination type that is wider than the source, and nor does | |||
939 | // it have a FP_TO_[SU]INT instruction with a narrower destination than | |||
940 | // source. | |||
941 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); | |||
942 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); | |||
943 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); | |||
944 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); | |||
945 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); | |||
946 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); | |||
947 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); | |||
948 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); | |||
949 | ||||
950 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); | |||
951 | setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); | |||
952 | ||||
953 | // NEON does not have single instruction CTPOP for vectors with element | |||
954 | // types wider than 8-bits. However, custom lowering can leverage the | |||
955 | // v8i8/v16i8 vcnt instruction. | |||
956 | setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); | |||
957 | setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); | |||
958 | setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); | |||
959 | setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); | |||
960 | setOperationAction(ISD::CTPOP, MVT::v1i64, Custom); | |||
961 | setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); | |||
962 | ||||
963 | setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); | |||
964 | setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); | |||
965 | ||||
966 | // NEON does not have single instruction CTTZ for vectors. | |||
967 | setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); | |||
968 | setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); | |||
969 | setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); | |||
970 | setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); | |||
971 | ||||
972 | setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); | |||
973 | setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); | |||
974 | setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); | |||
975 | setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); | |||
976 | ||||
977 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); | |||
978 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); | |||
979 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); | |||
980 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); | |||
981 | ||||
982 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); | |||
983 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); | |||
984 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); | |||
985 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); | |||
986 | ||||
987 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | |||
988 | setOperationAction(ISD::MULHS, VT, Expand); | |||
989 | setOperationAction(ISD::MULHU, VT, Expand); | |||
990 | } | |||
991 | ||||
992 | // NEON only has FMA instructions as of VFP4. | |||
993 | if (!Subtarget->hasVFP4Base()) { | |||
994 | setOperationAction(ISD::FMA, MVT::v2f32, Expand); | |||
995 | setOperationAction(ISD::FMA, MVT::v4f32, Expand); | |||
996 | } | |||
997 | ||||
998 | setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT, | |||
999 | ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD}); | |||
1000 | ||||
1001 | // It is legal to extload from v4i8 to v4i16 or v4i32. | |||
1002 | for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, | |||
1003 | MVT::v2i32}) { | |||
1004 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { | |||
1005 | setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); | |||
1006 | setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); | |||
1007 | setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); | |||
1008 | } | |||
1009 | } | |||
1010 | } | |||
1011 | ||||
1012 | if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { | |||
1013 | setTargetDAGCombine( | |||
1014 | {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR, | |||
1015 | ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, | |||
1016 | ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, | |||
1017 | ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, | |||
1018 | ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST}); | |||
1019 | } | |||
1020 | if (Subtarget->hasMVEIntegerOps()) { | |||
1021 | setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX, | |||
1022 | ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC, | |||
1023 | ISD::SETCC}); | |||
1024 | } | |||
1025 | if (Subtarget->hasMVEFloatOps()) { | |||
1026 | setTargetDAGCombine(ISD::FADD); | |||
1027 | } | |||
1028 | ||||
1029 | if (!Subtarget->hasFP64()) { | |||
1030 | // When targeting a floating-point unit with only single-precision | |||
1031 | // operations, f64 is legal for the few double-precision instructions which | |||
1032 | // are present However, no double-precision operations other than moves, | |||
1033 | // loads and stores are provided by the hardware. | |||
1034 | setOperationAction(ISD::FADD, MVT::f64, Expand); | |||
1035 | setOperationAction(ISD::FSUB, MVT::f64, Expand); | |||
1036 | setOperationAction(ISD::FMUL, MVT::f64, Expand); | |||
1037 | setOperationAction(ISD::FMA, MVT::f64, Expand); | |||
1038 | setOperationAction(ISD::FDIV, MVT::f64, Expand); | |||
1039 | setOperationAction(ISD::FREM, MVT::f64, Expand); | |||
1040 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); | |||
1041 | setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); | |||
1042 | setOperationAction(ISD::FNEG, MVT::f64, Expand); | |||
1043 | setOperationAction(ISD::FABS, MVT::f64, Expand); | |||
1044 | setOperationAction(ISD::FSQRT, MVT::f64, Expand); | |||
1045 | setOperationAction(ISD::FSIN, MVT::f64, Expand); | |||
1046 | setOperationAction(ISD::FCOS, MVT::f64, Expand); | |||
1047 | setOperationAction(ISD::FPOW, MVT::f64, Expand); | |||
1048 | setOperationAction(ISD::FLOG, MVT::f64, Expand); | |||
1049 | setOperationAction(ISD::FLOG2, MVT::f64, Expand); | |||
1050 | setOperationAction(ISD::FLOG10, MVT::f64, Expand); | |||
1051 | setOperationAction(ISD::FEXP, MVT::f64, Expand); | |||
1052 | setOperationAction(ISD::FEXP2, MVT::f64, Expand); | |||
1053 | setOperationAction(ISD::FCEIL, MVT::f64, Expand); | |||
1054 | setOperationAction(ISD::FTRUNC, MVT::f64, Expand); | |||
1055 | setOperationAction(ISD::FRINT, MVT::f64, Expand); | |||
1056 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); | |||
1057 | setOperationAction(ISD::FFLOOR, MVT::f64, Expand); | |||
1058 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); | |||
1059 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); | |||
1060 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); | |||
1061 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); | |||
1062 | setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); | |||
1063 | setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); | |||
1064 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); | |||
1065 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); | |||
1066 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); | |||
1067 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom); | |||
1068 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom); | |||
1069 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); | |||
1070 | } | |||
1071 | ||||
1072 | if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { | |||
1073 | setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); | |||
1074 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); | |||
1075 | if (Subtarget->hasFullFP16()) { | |||
1076 | setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); | |||
1077 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); | |||
1078 | } | |||
1079 | } | |||
1080 | ||||
1081 | if (!Subtarget->hasFP16()) { | |||
1082 | setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); | |||
1083 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); | |||
1084 | } | |||
1085 | ||||
1086 | computeRegisterProperties(Subtarget->getRegisterInfo()); | |||
1087 | ||||
1088 | // ARM does not have floating-point extending loads. | |||
1089 | for (MVT VT : MVT::fp_valuetypes()) { | |||
1090 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); | |||
1091 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); | |||
1092 | } | |||
1093 | ||||
1094 | // ... or truncating stores | |||
1095 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | |||
1096 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); | |||
1097 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); | |||
1098 | ||||
1099 | // ARM does not have i1 sign extending load. | |||
1100 | for (MVT VT : MVT::integer_valuetypes()) | |||
1101 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); | |||
1102 | ||||
1103 | // ARM supports all 4 flavors of integer indexed load / store. | |||
1104 | if (!Subtarget->isThumb1Only()) { | |||
1105 | for (unsigned im = (unsigned)ISD::PRE_INC; | |||
1106 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | |||
1107 | setIndexedLoadAction(im, MVT::i1, Legal); | |||
1108 | setIndexedLoadAction(im, MVT::i8, Legal); | |||
1109 | setIndexedLoadAction(im, MVT::i16, Legal); | |||
1110 | setIndexedLoadAction(im, MVT::i32, Legal); | |||
1111 | setIndexedStoreAction(im, MVT::i1, Legal); | |||
1112 | setIndexedStoreAction(im, MVT::i8, Legal); | |||
1113 | setIndexedStoreAction(im, MVT::i16, Legal); | |||
1114 | setIndexedStoreAction(im, MVT::i32, Legal); | |||
1115 | } | |||
1116 | } else { | |||
1117 | // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. | |||
1118 | setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); | |||
1119 | setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); | |||
1120 | } | |||
1121 | ||||
1122 | setOperationAction(ISD::SADDO, MVT::i32, Custom); | |||
1123 | setOperationAction(ISD::UADDO, MVT::i32, Custom); | |||
1124 | setOperationAction(ISD::SSUBO, MVT::i32, Custom); | |||
1125 | setOperationAction(ISD::USUBO, MVT::i32, Custom); | |||
1126 | ||||
1127 | setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); | |||
1128 | setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); | |||
1129 | if (Subtarget->hasDSP()) { | |||
1130 | setOperationAction(ISD::SADDSAT, MVT::i8, Custom); | |||
1131 | setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); | |||
1132 | setOperationAction(ISD::SADDSAT, MVT::i16, Custom); | |||
1133 | setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); | |||
1134 | setOperationAction(ISD::UADDSAT, MVT::i8, Custom); | |||
1135 | setOperationAction(ISD::USUBSAT, MVT::i8, Custom); | |||
1136 | setOperationAction(ISD::UADDSAT, MVT::i16, Custom); | |||
1137 | setOperationAction(ISD::USUBSAT, MVT::i16, Custom); | |||
1138 | } | |||
1139 | if (Subtarget->hasBaseDSP()) { | |||
1140 | setOperationAction(ISD::SADDSAT, MVT::i32, Legal); | |||
1141 | setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); | |||
1142 | } | |||
1143 | ||||
1144 | // i64 operation support. | |||
1145 | setOperationAction(ISD::MUL, MVT::i64, Expand); | |||
1146 | setOperationAction(ISD::MULHU, MVT::i32, Expand); | |||
1147 | if (Subtarget->isThumb1Only()) { | |||
1148 | setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); | |||
1149 | setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); | |||
1150 | } | |||
1151 | if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() | |||
1152 | || (Subtarget->isThumb2() && !Subtarget->hasDSP())) | |||
1153 | setOperationAction(ISD::MULHS, MVT::i32, Expand); | |||
1154 | ||||
1155 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); | |||
1156 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); | |||
1157 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); | |||
1158 | setOperationAction(ISD::SRL, MVT::i64, Custom); | |||
1159 | setOperationAction(ISD::SRA, MVT::i64, Custom); | |||
1160 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); | |||
1161 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); | |||
1162 | setOperationAction(ISD::LOAD, MVT::i64, Custom); | |||
1163 | setOperationAction(ISD::STORE, MVT::i64, Custom); | |||
1164 | ||||
1165 | // MVE lowers 64 bit shifts to lsll and lsrl | |||
1166 | // assuming that ISD::SRL and SRA of i64 are already marked custom | |||
1167 | if (Subtarget->hasMVEIntegerOps()) | |||
1168 | setOperationAction(ISD::SHL, MVT::i64, Custom); | |||
1169 | ||||
1170 | // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. | |||
1171 | if (Subtarget->isThumb1Only()) { | |||
1172 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); | |||
1173 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); | |||
1174 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); | |||
1175 | } | |||
1176 | ||||
1177 | if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) | |||
1178 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); | |||
1179 | ||||
1180 | // ARM does not have ROTL. | |||
1181 | setOperationAction(ISD::ROTL, MVT::i32, Expand); | |||
1182 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | |||
1183 | setOperationAction(ISD::ROTL, VT, Expand); | |||
1184 | setOperationAction(ISD::ROTR, VT, Expand); | |||
1185 | } | |||
1186 | setOperationAction(ISD::CTTZ, MVT::i32, Custom); | |||
1187 | setOperationAction(ISD::CTPOP, MVT::i32, Expand); | |||
1188 | if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) { | |||
1189 | setOperationAction(ISD::CTLZ, MVT::i32, Expand); | |||
1190 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall); | |||
1191 | } | |||
1192 | ||||
1193 | // @llvm.readcyclecounter requires the Performance Monitors extension. | |||
1194 | // Default to the 0 expansion on unsupported platforms. | |||
1195 | // FIXME: Technically there are older ARM CPUs that have | |||
1196 | // implementation-specific ways of obtaining this information. | |||
1197 | if (Subtarget->hasPerfMon()) | |||
1198 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); | |||
1199 | ||||
1200 | // Only ARMv6 has BSWAP. | |||
1201 | if (!Subtarget->hasV6Ops()) | |||
1202 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); | |||
1203 | ||||
1204 | bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() | |||
1205 | : Subtarget->hasDivideInARMMode(); | |||
1206 | if (!hasDivide) { | |||
1207 | // These are expanded into libcalls if the cpu doesn't have HW divider. | |||
1208 | setOperationAction(ISD::SDIV, MVT::i32, LibCall); | |||
1209 | setOperationAction(ISD::UDIV, MVT::i32, LibCall); | |||
1210 | } | |||
1211 | ||||
1212 | if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { | |||
1213 | setOperationAction(ISD::SDIV, MVT::i32, Custom); | |||
1214 | setOperationAction(ISD::UDIV, MVT::i32, Custom); | |||
1215 | ||||
1216 | setOperationAction(ISD::SDIV, MVT::i64, Custom); | |||
1217 | setOperationAction(ISD::UDIV, MVT::i64, Custom); | |||
1218 | } | |||
1219 | ||||
1220 | setOperationAction(ISD::SREM, MVT::i32, Expand); | |||
1221 | setOperationAction(ISD::UREM, MVT::i32, Expand); | |||
1222 | ||||
1223 | // Register based DivRem for AEABI (RTABI 4.2) | |||
1224 | if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || | |||
1225 | Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || | |||
1226 | Subtarget->isTargetWindows()) { | |||
1227 | setOperationAction(ISD::SREM, MVT::i64, Custom); | |||
1228 | setOperationAction(ISD::UREM, MVT::i64, Custom); | |||
1229 | HasStandaloneRem = false; | |||
1230 | ||||
1231 | if (Subtarget->isTargetWindows()) { | |||
1232 | const struct { | |||
1233 | const RTLIB::Libcall Op; | |||
1234 | const char * const Name; | |||
1235 | const CallingConv::ID CC; | |||
1236 | } LibraryCalls[] = { | |||
1237 | { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, | |||
1238 | { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, | |||
1239 | { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, | |||
1240 | { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, | |||
1241 | ||||
1242 | { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, | |||
1243 | { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, | |||
1244 | { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, | |||
1245 | { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, | |||
1246 | }; | |||
1247 | ||||
1248 | for (const auto &LC : LibraryCalls) { | |||
1249 | setLibcallName(LC.Op, LC.Name); | |||
1250 | setLibcallCallingConv(LC.Op, LC.CC); | |||
1251 | } | |||
1252 | } else { | |||
1253 | const struct { | |||
1254 | const RTLIB::Libcall Op; | |||
1255 | const char * const Name; | |||
1256 | const CallingConv::ID CC; | |||
1257 | } LibraryCalls[] = { | |||
1258 | { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | |||
1259 | { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | |||
1260 | { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | |||
1261 | { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, | |||
1262 | ||||
1263 | { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | |||
1264 | { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | |||
1265 | { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | |||
1266 | { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, | |||
1267 | }; | |||
1268 | ||||
1269 | for (const auto &LC : LibraryCalls) { | |||
1270 | setLibcallName(LC.Op, LC.Name); | |||
1271 | setLibcallCallingConv(LC.Op, LC.CC); | |||
1272 | } | |||
1273 | } | |||
1274 | ||||
1275 | setOperationAction(ISD::SDIVREM, MVT::i32, Custom); | |||
1276 | setOperationAction(ISD::UDIVREM, MVT::i32, Custom); | |||
1277 | setOperationAction(ISD::SDIVREM, MVT::i64, Custom); | |||
1278 | setOperationAction(ISD::UDIVREM, MVT::i64, Custom); | |||
1279 | } else { | |||
1280 | setOperationAction(ISD::SDIVREM, MVT::i32, Expand); | |||
1281 | setOperationAction(ISD::UDIVREM, MVT::i32, Expand); | |||
1282 | } | |||
1283 | ||||
1284 | if (Subtarget->getTargetTriple().isOSMSVCRT()) { | |||
1285 | // MSVCRT doesn't have powi; fall back to pow | |||
1286 | setLibcallName(RTLIB::POWI_F32, nullptr); | |||
1287 | setLibcallName(RTLIB::POWI_F64, nullptr); | |||
1288 | } | |||
1289 | ||||
1290 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | |||
1291 | setOperationAction(ISD::ConstantPool, MVT::i32, Custom); | |||
1292 | setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); | |||
1293 | setOperationAction(ISD::BlockAddress, MVT::i32, Custom); | |||
1294 | ||||
1295 | setOperationAction(ISD::TRAP, MVT::Other, Legal); | |||
1296 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); | |||
1297 | ||||
1298 | // Use the default implementation. | |||
1299 | setOperationAction(ISD::VASTART, MVT::Other, Custom); | |||
1300 | setOperationAction(ISD::VAARG, MVT::Other, Expand); | |||
1301 | setOperationAction(ISD::VACOPY, MVT::Other, Expand); | |||
1302 | setOperationAction(ISD::VAEND, MVT::Other, Expand); | |||
1303 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); | |||
1304 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); | |||
1305 | ||||
1306 | if (Subtarget->isTargetWindows()) | |||
1307 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); | |||
1308 | else | |||
1309 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); | |||
1310 | ||||
1311 | // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use | |||
1312 | // the default expansion. | |||
1313 | InsertFencesForAtomic = false; | |||
1314 | if (Subtarget->hasAnyDataBarrier() && | |||
1315 | (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { | |||
1316 | // ATOMIC_FENCE needs custom lowering; the others should have been expanded | |||
1317 | // to ldrex/strex loops already. | |||
1318 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); | |||
1319 | if (!Subtarget->isThumb() || !Subtarget->isMClass()) | |||
1320 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); | |||
1321 | ||||
1322 | // On v8, we have particularly efficient implementations of atomic fences | |||
1323 | // if they can be combined with nearby atomic loads and stores. | |||
1324 | if (!Subtarget->hasAcquireRelease() || | |||
1325 | getTargetMachine().getOptLevel() == 0) { | |||
1326 | // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. | |||
1327 | InsertFencesForAtomic = true; | |||
1328 | } | |||
1329 | } else { | |||
1330 | // If there's anything we can use as a barrier, go through custom lowering | |||
1331 | // for ATOMIC_FENCE. | |||
1332 | // If target has DMB in thumb, Fences can be inserted. | |||
1333 | if (Subtarget->hasDataBarrier()) | |||
1334 | InsertFencesForAtomic = true; | |||
1335 | ||||
1336 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, | |||
1337 | Subtarget->hasAnyDataBarrier() ? Custom : Expand); | |||
1338 | ||||
1339 | // Set them all for expansion, which will force libcalls. | |||
1340 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); | |||
1341 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); | |||
1342 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); | |||
1343 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); | |||
1344 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); | |||
1345 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); | |||
1346 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); | |||
1347 | setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); | |||
1348 | setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); | |||
1349 | setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); | |||
1350 | setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); | |||
1351 | setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); | |||
1352 | // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the | |||
1353 | // Unordered/Monotonic case. | |||
1354 | if (!InsertFencesForAtomic) { | |||
1355 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); | |||
1356 | setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); | |||
1357 | } | |||
1358 | } | |||
1359 | ||||
1360 | // Compute supported atomic widths. | |||
1361 | if (Subtarget->isTargetLinux() || | |||
1362 | (!Subtarget->isMClass() && Subtarget->hasV6Ops())) { | |||
1363 | // For targets where __sync_* routines are reliably available, we use them | |||
1364 | // if necessary. | |||
1365 | // | |||
1366 | // ARM Linux always supports 64-bit atomics through kernel-assisted atomic | |||
1367 | // routines (kernel 3.1 or later). FIXME: Not with compiler-rt? | |||
1368 | // | |||
1369 | // ARMv6 targets have native instructions in ARM mode. For Thumb mode, | |||
1370 | // such targets should provide __sync_* routines, which use the ARM mode | |||
1371 | // instructions. (ARMv6 doesn't have dmb, but it has an equivalent | |||
1372 | // encoding; see ARMISD::MEMBARRIER_MCR.) | |||
1373 | setMaxAtomicSizeInBitsSupported(64); | |||
1374 | } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) || | |||
1375 | Subtarget->hasForced32BitAtomics()) { | |||
1376 | // Cortex-M (besides Cortex-M0) have 32-bit atomics. | |||
1377 | setMaxAtomicSizeInBitsSupported(32); | |||
1378 | } else { | |||
1379 | // We can't assume anything about other targets; just use libatomic | |||
1380 | // routines. | |||
1381 | setMaxAtomicSizeInBitsSupported(0); | |||
1382 | } | |||
1383 | ||||
1384 | setMaxDivRemBitWidthSupported(64); | |||
1385 | ||||
1386 | setOperationAction(ISD::PREFETCH, MVT::Other, Custom); | |||
1387 | ||||
1388 | // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. | |||
1389 | if (!Subtarget->hasV6Ops()) { | |||
1390 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); | |||
1391 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); | |||
1392 | } | |||
1393 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | |||
1394 | ||||
1395 | if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() && | |||
1396 | !Subtarget->isThumb1Only()) { | |||
1397 | // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR | |||
1398 | // iff target supports vfp2. | |||
1399 | setOperationAction(ISD::BITCAST, MVT::i64, Custom); | |||
1400 | setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); | |||
1401 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); | |||
1402 | } | |||
1403 | ||||
1404 | // We want to custom lower some of our intrinsics. | |||
1405 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | |||
1406 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); | |||
1407 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); | |||
1408 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); | |||
1409 | if (Subtarget->useSjLjEH()) | |||
1410 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); | |||
1411 | ||||
1412 | setOperationAction(ISD::SETCC, MVT::i32, Expand); | |||
1413 | setOperationAction(ISD::SETCC, MVT::f32, Expand); | |||
1414 | setOperationAction(ISD::SETCC, MVT::f64, Expand); | |||
1415 | setOperationAction(ISD::SELECT, MVT::i32, Custom); | |||
1416 | setOperationAction(ISD::SELECT, MVT::f32, Custom); | |||
1417 | setOperationAction(ISD::SELECT, MVT::f64, Custom); | |||
1418 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); | |||
1419 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); | |||
1420 | setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); | |||
1421 | if (Subtarget->hasFullFP16()) { | |||
1422 | setOperationAction(ISD::SETCC, MVT::f16, Expand); | |||
1423 | setOperationAction(ISD::SELECT, MVT::f16, Custom); | |||
1424 | setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); | |||
1425 | } | |||
1426 | ||||
1427 | setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); | |||
1428 | ||||
1429 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); | |||
1430 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); | |||
1431 | if (Subtarget->hasFullFP16()) | |||
1432 | setOperationAction(ISD::BR_CC, MVT::f16, Custom); | |||
1433 | setOperationAction(ISD::BR_CC, MVT::f32, Custom); | |||
1434 | setOperationAction(ISD::BR_CC, MVT::f64, Custom); | |||
1435 | setOperationAction(ISD::BR_JT, MVT::Other, Custom); | |||
1436 | ||||
1437 | // We don't support sin/cos/fmod/copysign/pow | |||
1438 | setOperationAction(ISD::FSIN, MVT::f64, Expand); | |||
1439 | setOperationAction(ISD::FSIN, MVT::f32, Expand); | |||
1440 | setOperationAction(ISD::FCOS, MVT::f32, Expand); | |||
1441 | setOperationAction(ISD::FCOS, MVT::f64, Expand); | |||
1442 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); | |||
1443 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); | |||
1444 | setOperationAction(ISD::FREM, MVT::f64, Expand); | |||
1445 | setOperationAction(ISD::FREM, MVT::f32, Expand); | |||
1446 | if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && | |||
1447 | !Subtarget->isThumb1Only()) { | |||
1448 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); | |||
1449 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); | |||
1450 | } | |||
1451 | setOperationAction(ISD::FPOW, MVT::f64, Expand); | |||
1452 | setOperationAction(ISD::FPOW, MVT::f32, Expand); | |||
1453 | ||||
1454 | if (!Subtarget->hasVFP4Base()) { | |||
1455 | setOperationAction(ISD::FMA, MVT::f64, Expand); | |||
1456 | setOperationAction(ISD::FMA, MVT::f32, Expand); | |||
1457 | } | |||
1458 | ||||
1459 | // Various VFP goodness | |||
1460 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { | |||
1461 | // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. | |||
1462 | if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) { | |||
1463 | setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); | |||
1464 | setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); | |||
1465 | } | |||
1466 | ||||
1467 | // fp16 is a special v7 extension that adds f16 <-> f32 conversions. | |||
1468 | if (!Subtarget->hasFP16()) { | |||
1469 | setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); | |||
1470 | setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); | |||
1471 | } | |||
1472 | ||||
1473 | // Strict floating-point comparisons need custom lowering. | |||
1474 | setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); | |||
1475 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); | |||
1476 | setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); | |||
1477 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); | |||
1478 | setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); | |||
1479 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); | |||
1480 | } | |||
1481 | ||||
1482 | // Use __sincos_stret if available. | |||
1483 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && | |||
1484 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { | |||
1485 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); | |||
1486 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); | |||
1487 | } | |||
1488 | ||||
1489 | // FP-ARMv8 implements a lot of rounding-like FP operations. | |||
1490 | if (Subtarget->hasFPARMv8Base()) { | |||
1491 | setOperationAction(ISD::FFLOOR, MVT::f32, Legal); | |||
1492 | setOperationAction(ISD::FCEIL, MVT::f32, Legal); | |||
1493 | setOperationAction(ISD::FROUND, MVT::f32, Legal); | |||
1494 | setOperationAction(ISD::FTRUNC, MVT::f32, Legal); | |||
1495 | setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); | |||
1496 | setOperationAction(ISD::FRINT, MVT::f32, Legal); | |||
1497 | setOperationAction(ISD::FMINNUM, MVT::f32, Legal); | |||
1498 | setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); | |||
1499 | if (Subtarget->hasNEON()) { | |||
1500 | setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); | |||
1501 | setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); | |||
1502 | setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); | |||
1503 | setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); | |||
1504 | } | |||
1505 | ||||
1506 | if (Subtarget->hasFP64()) { | |||
1507 | setOperationAction(ISD::FFLOOR, MVT::f64, Legal); | |||
1508 | setOperationAction(ISD::FCEIL, MVT::f64, Legal); | |||
1509 | setOperationAction(ISD::FROUND, MVT::f64, Legal); | |||
1510 | setOperationAction(ISD::FTRUNC, MVT::f64, Legal); | |||
1511 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); | |||
1512 | setOperationAction(ISD::FRINT, MVT::f64, Legal); | |||
1513 | setOperationAction(ISD::FMINNUM, MVT::f64, Legal); | |||
1514 | setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); | |||
1515 | } | |||
1516 | } | |||
1517 | ||||
1518 | // FP16 often need to be promoted to call lib functions | |||
1519 | if (Subtarget->hasFullFP16()) { | |||
1520 | setOperationAction(ISD::FREM, MVT::f16, Promote); | |||
1521 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); | |||
1522 | setOperationAction(ISD::FSIN, MVT::f16, Promote); | |||
1523 | setOperationAction(ISD::FCOS, MVT::f16, Promote); | |||
1524 | setOperationAction(ISD::FSINCOS, MVT::f16, Promote); | |||
1525 | setOperationAction(ISD::FPOWI, MVT::f16, Promote); | |||
1526 | setOperationAction(ISD::FPOW, MVT::f16, Promote); | |||
1527 | setOperationAction(ISD::FEXP, MVT::f16, Promote); | |||
1528 | setOperationAction(ISD::FEXP2, MVT::f16, Promote); | |||
1529 | setOperationAction(ISD::FLOG, MVT::f16, Promote); | |||
1530 | setOperationAction(ISD::FLOG10, MVT::f16, Promote); | |||
1531 | setOperationAction(ISD::FLOG2, MVT::f16, Promote); | |||
1532 | ||||
1533 | setOperationAction(ISD::FROUND, MVT::f16, Legal); | |||
1534 | } | |||
1535 | ||||
1536 | if (Subtarget->hasNEON()) { | |||
1537 | // vmin and vmax aren't available in a scalar form, so we can use | |||
1538 | // a NEON instruction with an undef lane instead. This has a performance | |||
1539 | // penalty on some cores, so we don't do this unless we have been | |||
1540 | // asked to by the core tuning model. | |||
1541 | if (Subtarget->useNEONForSinglePrecisionFP()) { | |||
1542 | setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); | |||
1543 | setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); | |||
1544 | setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); | |||
1545 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); | |||
1546 | } | |||
1547 | setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); | |||
1548 | setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); | |||
1549 | setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); | |||
1550 | setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); | |||
1551 | ||||
1552 | if (Subtarget->hasFullFP16()) { | |||
1553 | setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); | |||
1554 | setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); | |||
1555 | setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); | |||
1556 | setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); | |||
1557 | ||||
1558 | setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal); | |||
1559 | setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal); | |||
1560 | setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); | |||
1561 | setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); | |||
1562 | } | |||
1563 | } | |||
1564 | ||||
1565 | // We have target-specific dag combine patterns for the following nodes: | |||
1566 | // ARMISD::VMOVRRD - No need to call setTargetDAGCombine | |||
1567 | setTargetDAGCombine( | |||
1568 | {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR}); | |||
1569 | ||||
1570 | if (Subtarget->hasMVEIntegerOps()) | |||
1571 | setTargetDAGCombine(ISD::VSELECT); | |||
1572 | ||||
1573 | if (Subtarget->hasV6Ops()) | |||
1574 | setTargetDAGCombine(ISD::SRL); | |||
1575 | if (Subtarget->isThumb1Only()) | |||
1576 | setTargetDAGCombine(ISD::SHL); | |||
1577 | // Attempt to lower smin/smax to ssat/usat | |||
1578 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || | |||
1579 | Subtarget->isThumb2()) { | |||
1580 | setTargetDAGCombine({ISD::SMIN, ISD::SMAX}); | |||
1581 | } | |||
1582 | ||||
1583 | setStackPointerRegisterToSaveRestore(ARM::SP); | |||
1584 | ||||
1585 | if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || | |||
1586 | !Subtarget->hasVFP2Base() || Subtarget->hasMinSize()) | |||
1587 | setSchedulingPreference(Sched::RegPressure); | |||
1588 | else | |||
1589 | setSchedulingPreference(Sched::Hybrid); | |||
1590 | ||||
1591 | //// temporary - rewrite interface to use type | |||
1592 | MaxStoresPerMemset = 8; | |||
1593 | MaxStoresPerMemsetOptSize = 4; | |||
1594 | MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores | |||
1595 | MaxStoresPerMemcpyOptSize = 2; | |||
1596 | MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores | |||
1597 | MaxStoresPerMemmoveOptSize = 2; | |||
1598 | ||||
1599 | // On ARM arguments smaller than 4 bytes are extended, so all arguments | |||
1600 | // are at least 4 bytes aligned. | |||
1601 | setMinStackArgumentAlignment(Align(4)); | |||
1602 | ||||
1603 | // Prefer likely predicted branches to selects on out-of-order cores. | |||
1604 | PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); | |||
1605 | ||||
1606 | setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); | |||
1607 | ||||
1608 | setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); | |||
1609 | ||||
1610 | if (Subtarget->isThumb() || Subtarget->isThumb2()) | |||
1611 | setTargetDAGCombine(ISD::ABS); | |||
1612 | } | |||
1613 | ||||
1614 | bool ARMTargetLowering::useSoftFloat() const { | |||
1615 | return Subtarget->useSoftFloat(); | |||
1616 | } | |||
1617 | ||||
1618 | // FIXME: It might make sense to define the representative register class as the | |||
1619 | // nearest super-register that has a non-null superset. For example, DPR_VFP2 is | |||
1620 | // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, | |||
1621 | // SPR's representative would be DPR_VFP2. This should work well if register | |||
1622 | // pressure tracking were modified such that a register use would increment the | |||
1623 | // pressure of the register class's representative and all of it's super | |||
1624 | // classes' representatives transitively. We have not implemented this because | |||
1625 | // of the difficulty prior to coalescing of modeling operand register classes | |||
1626 | // due to the common occurrence of cross class copies and subregister insertions | |||
1627 | // and extractions. | |||
1628 | std::pair<const TargetRegisterClass *, uint8_t> | |||
1629 | ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, | |||
1630 | MVT VT) const { | |||
1631 | const TargetRegisterClass *RRC = nullptr; | |||
1632 | uint8_t Cost = 1; | |||
1633 | switch (VT.SimpleTy) { | |||
1634 | default: | |||
1635 | return TargetLowering::findRepresentativeClass(TRI, VT); | |||
1636 | // Use DPR as representative register class for all floating point | |||
1637 | // and vector types. Since there are 32 SPR registers and 32 DPR registers so | |||
1638 | // the cost is 1 for both f32 and f64. | |||
1639 | case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: | |||
1640 | case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: | |||
1641 | RRC = &ARM::DPRRegClass; | |||
1642 | // When NEON is used for SP, only half of the register file is available | |||
1643 | // because operations that define both SP and DP results will be constrained | |||
1644 | // to the VFP2 class (D0-D15). We currently model this constraint prior to | |||
1645 | // coalescing by double-counting the SP regs. See the FIXME above. | |||
1646 | if (Subtarget->useNEONForSinglePrecisionFP()) | |||
1647 | Cost = 2; | |||
1648 | break; | |||
1649 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: | |||
1650 | case MVT::v4f32: case MVT::v2f64: | |||
1651 | RRC = &ARM::DPRRegClass; | |||
1652 | Cost = 2; | |||
1653 | break; | |||
1654 | case MVT::v4i64: | |||
1655 | RRC = &ARM::DPRRegClass; | |||
1656 | Cost = 4; | |||
1657 | break; | |||
1658 | case MVT::v8i64: | |||
1659 | RRC = &ARM::DPRRegClass; | |||
1660 | Cost = 8; | |||
1661 | break; | |||
1662 | } | |||
1663 | return std::make_pair(RRC, Cost); | |||
1664 | } | |||
1665 | ||||
1666 | const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { | |||
1667 | #define MAKE_CASE(V) \ | |||
1668 | case V: \ | |||
1669 | return #V; | |||
1670 | switch ((ARMISD::NodeType)Opcode) { | |||
1671 | case ARMISD::FIRST_NUMBER: | |||
1672 | break; | |||
1673 | MAKE_CASE(ARMISD::Wrapper) | |||
1674 | MAKE_CASE(ARMISD::WrapperPIC) | |||
1675 | MAKE_CASE(ARMISD::WrapperJT) | |||
1676 | MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) | |||
1677 | MAKE_CASE(ARMISD::CALL) | |||
1678 | MAKE_CASE(ARMISD::CALL_PRED) | |||
1679 | MAKE_CASE(ARMISD::CALL_NOLINK) | |||
1680 | MAKE_CASE(ARMISD::tSECALL) | |||
1681 | MAKE_CASE(ARMISD::t2CALL_BTI) | |||
1682 | MAKE_CASE(ARMISD::BRCOND) | |||
1683 | MAKE_CASE(ARMISD::BR_JT) | |||
1684 | MAKE_CASE(ARMISD::BR2_JT) | |||
1685 | MAKE_CASE(ARMISD::RET_FLAG) | |||
1686 | MAKE_CASE(ARMISD::SERET_FLAG) | |||
1687 | MAKE_CASE(ARMISD::INTRET_FLAG) | |||
1688 | MAKE_CASE(ARMISD::PIC_ADD) | |||
1689 | MAKE_CASE(ARMISD::CMP) | |||
1690 | MAKE_CASE(ARMISD::CMN) | |||
1691 | MAKE_CASE(ARMISD::CMPZ) | |||
1692 | MAKE_CASE(ARMISD::CMPFP) | |||
1693 | MAKE_CASE(ARMISD::CMPFPE) | |||
1694 | MAKE_CASE(ARMISD::CMPFPw0) | |||
1695 | MAKE_CASE(ARMISD::CMPFPEw0) | |||
1696 | MAKE_CASE(ARMISD::BCC_i64) | |||
1697 | MAKE_CASE(ARMISD::FMSTAT) | |||
1698 | MAKE_CASE(ARMISD::CMOV) | |||
1699 | MAKE_CASE(ARMISD::SUBS) | |||
1700 | MAKE_CASE(ARMISD::SSAT) | |||
1701 | MAKE_CASE(ARMISD::USAT) | |||
1702 | MAKE_CASE(ARMISD::ASRL) | |||
1703 | MAKE_CASE(ARMISD::LSRL) | |||
1704 | MAKE_CASE(ARMISD::LSLL) | |||
1705 | MAKE_CASE(ARMISD::SRL_FLAG) | |||
1706 | MAKE_CASE(ARMISD::SRA_FLAG) | |||
1707 | MAKE_CASE(ARMISD::RRX) | |||
1708 | MAKE_CASE(ARMISD::ADDC) | |||
1709 | MAKE_CASE(ARMISD::ADDE) | |||
1710 | MAKE_CASE(ARMISD::SUBC) | |||
1711 | MAKE_CASE(ARMISD::SUBE) | |||
1712 | MAKE_CASE(ARMISD::LSLS) | |||
1713 | MAKE_CASE(ARMISD::VMOVRRD) | |||
1714 | MAKE_CASE(ARMISD::VMOVDRR) | |||
1715 | MAKE_CASE(ARMISD::VMOVhr) | |||
1716 | MAKE_CASE(ARMISD::VMOVrh) | |||
1717 | MAKE_CASE(ARMISD::VMOVSR) | |||
1718 | MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) | |||
1719 | MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) | |||
1720 | MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) | |||
1721 | MAKE_CASE(ARMISD::TC_RETURN) | |||
1722 | MAKE_CASE(ARMISD::THREAD_POINTER) | |||
1723 | MAKE_CASE(ARMISD::DYN_ALLOC) | |||
1724 | MAKE_CASE(ARMISD::MEMBARRIER_MCR) | |||
1725 | MAKE_CASE(ARMISD::PRELOAD) | |||
1726 | MAKE_CASE(ARMISD::LDRD) | |||
1727 | MAKE_CASE(ARMISD::STRD) | |||
1728 | MAKE_CASE(ARMISD::WIN__CHKSTK) | |||
1729 | MAKE_CASE(ARMISD::WIN__DBZCHK) | |||
1730 | MAKE_CASE(ARMISD::PREDICATE_CAST) | |||
1731 | MAKE_CASE(ARMISD::VECTOR_REG_CAST) | |||
1732 | MAKE_CASE(ARMISD::MVESEXT) | |||
1733 | MAKE_CASE(ARMISD::MVEZEXT) | |||
1734 | MAKE_CASE(ARMISD::MVETRUNC) | |||
1735 | MAKE_CASE(ARMISD::VCMP) | |||
1736 | MAKE_CASE(ARMISD::VCMPZ) | |||
1737 | MAKE_CASE(ARMISD::VTST) | |||
1738 | MAKE_CASE(ARMISD::VSHLs) | |||
1739 | MAKE_CASE(ARMISD::VSHLu) | |||
1740 | MAKE_CASE(ARMISD::VSHLIMM) | |||
1741 | MAKE_CASE(ARMISD::VSHRsIMM) | |||
1742 | MAKE_CASE(ARMISD::VSHRuIMM) | |||
1743 | MAKE_CASE(ARMISD::VRSHRsIMM) | |||
1744 | MAKE_CASE(ARMISD::VRSHRuIMM) | |||
1745 | MAKE_CASE(ARMISD::VRSHRNIMM) | |||
1746 | MAKE_CASE(ARMISD::VQSHLsIMM) | |||
1747 | MAKE_CASE(ARMISD::VQSHLuIMM) | |||
1748 | MAKE_CASE(ARMISD::VQSHLsuIMM) | |||
1749 | MAKE_CASE(ARMISD::VQSHRNsIMM) | |||
1750 | MAKE_CASE(ARMISD::VQSHRNuIMM) | |||
1751 | MAKE_CASE(ARMISD::VQSHRNsuIMM) | |||
1752 | MAKE_CASE(ARMISD::VQRSHRNsIMM) | |||
1753 | MAKE_CASE(ARMISD::VQRSHRNuIMM) | |||
1754 | MAKE_CASE(ARMISD::VQRSHRNsuIMM) | |||
1755 | MAKE_CASE(ARMISD::VSLIIMM) | |||
1756 | MAKE_CASE(ARMISD::VSRIIMM) | |||
1757 | MAKE_CASE(ARMISD::VGETLANEu) | |||
1758 | MAKE_CASE(ARMISD::VGETLANEs) | |||
1759 | MAKE_CASE(ARMISD::VMOVIMM) | |||
1760 | MAKE_CASE(ARMISD::VMVNIMM) | |||
1761 | MAKE_CASE(ARMISD::VMOVFPIMM) | |||
1762 | MAKE_CASE(ARMISD::VDUP) | |||
1763 | MAKE_CASE(ARMISD::VDUPLANE) | |||
1764 | MAKE_CASE(ARMISD::VEXT) | |||
1765 | MAKE_CASE(ARMISD::VREV64) | |||
1766 | MAKE_CASE(ARMISD::VREV32) | |||
1767 | MAKE_CASE(ARMISD::VREV16) | |||
1768 | MAKE_CASE(ARMISD::VZIP) | |||
1769 | MAKE_CASE(ARMISD::VUZP) | |||
1770 | MAKE_CASE(ARMISD::VTRN) | |||
1771 | MAKE_CASE(ARMISD::VTBL1) | |||
1772 | MAKE_CASE(ARMISD::VTBL2) | |||
1773 | MAKE_CASE(ARMISD::VMOVN) | |||
1774 | MAKE_CASE(ARMISD::VQMOVNs) | |||
1775 | MAKE_CASE(ARMISD::VQMOVNu) | |||
1776 | MAKE_CASE(ARMISD::VCVTN) | |||
1777 | MAKE_CASE(ARMISD::VCVTL) | |||
1778 | MAKE_CASE(ARMISD::VIDUP) | |||
1779 | MAKE_CASE(ARMISD::VMULLs) | |||
1780 | MAKE_CASE(ARMISD::VMULLu) | |||
1781 | MAKE_CASE(ARMISD::VQDMULH) | |||
1782 | MAKE_CASE(ARMISD::VADDVs) | |||
1783 | MAKE_CASE(ARMISD::VADDVu) | |||
1784 | MAKE_CASE(ARMISD::VADDVps) | |||
1785 | MAKE_CASE(ARMISD::VADDVpu) | |||
1786 | MAKE_CASE(ARMISD::VADDLVs) | |||
1787 | MAKE_CASE(ARMISD::VADDLVu) | |||
1788 | MAKE_CASE(ARMISD::VADDLVAs) | |||
1789 | MAKE_CASE(ARMISD::VADDLVAu) | |||
1790 | MAKE_CASE(ARMISD::VADDLVps) | |||
1791 | MAKE_CASE(ARMISD::VADDLVpu) | |||
1792 | MAKE_CASE(ARMISD::VADDLVAps) | |||
1793 | MAKE_CASE(ARMISD::VADDLVApu) | |||
1794 | MAKE_CASE(ARMISD::VMLAVs) | |||
1795 | MAKE_CASE(ARMISD::VMLAVu) | |||
1796 | MAKE_CASE(ARMISD::VMLAVps) | |||
1797 | MAKE_CASE(ARMISD::VMLAVpu) | |||
1798 | MAKE_CASE(ARMISD::VMLALVs) | |||
1799 | MAKE_CASE(ARMISD::VMLALVu) | |||
1800 | MAKE_CASE(ARMISD::VMLALVps) | |||
1801 | MAKE_CASE(ARMISD::VMLALVpu) | |||
1802 | MAKE_CASE(ARMISD::VMLALVAs) | |||
1803 | MAKE_CASE(ARMISD::VMLALVAu) | |||
1804 | MAKE_CASE(ARMISD::VMLALVAps) | |||
1805 | MAKE_CASE(ARMISD::VMLALVApu) | |||
1806 | MAKE_CASE(ARMISD::VMINVu) | |||
1807 | MAKE_CASE(ARMISD::VMINVs) | |||
1808 | MAKE_CASE(ARMISD::VMAXVu) | |||
1809 | MAKE_CASE(ARMISD::VMAXVs) | |||
1810 | MAKE_CASE(ARMISD::UMAAL) | |||
1811 | MAKE_CASE(ARMISD::UMLAL) | |||
1812 | MAKE_CASE(ARMISD::SMLAL) | |||
1813 | MAKE_CASE(ARMISD::SMLALBB) | |||
1814 | MAKE_CASE(ARMISD::SMLALBT) | |||
1815 | MAKE_CASE(ARMISD::SMLALTB) | |||
1816 | MAKE_CASE(ARMISD::SMLALTT) | |||
1817 | MAKE_CASE(ARMISD::SMULWB) | |||
1818 | MAKE_CASE(ARMISD::SMULWT) | |||
1819 | MAKE_CASE(ARMISD::SMLALD) | |||
1820 | MAKE_CASE(ARMISD::SMLALDX) | |||
1821 | MAKE_CASE(ARMISD::SMLSLD) | |||
1822 | MAKE_CASE(ARMISD::SMLSLDX) | |||
1823 | MAKE_CASE(ARMISD::SMMLAR) | |||
1824 | MAKE_CASE(ARMISD::SMMLSR) | |||
1825 | MAKE_CASE(ARMISD::QADD16b) | |||
1826 | MAKE_CASE(ARMISD::QSUB16b) | |||
1827 | MAKE_CASE(ARMISD::QADD8b) | |||
1828 | MAKE_CASE(ARMISD::QSUB8b) | |||
1829 | MAKE_CASE(ARMISD::UQADD16b) | |||
1830 | MAKE_CASE(ARMISD::UQSUB16b) | |||
1831 | MAKE_CASE(ARMISD::UQADD8b) | |||
1832 | MAKE_CASE(ARMISD::UQSUB8b) | |||
1833 | MAKE_CASE(ARMISD::BUILD_VECTOR) | |||
1834 | MAKE_CASE(ARMISD::BFI) | |||
1835 | MAKE_CASE(ARMISD::VORRIMM) | |||
1836 | MAKE_CASE(ARMISD::VBICIMM) | |||
1837 | MAKE_CASE(ARMISD::VBSP) | |||
1838 | MAKE_CASE(ARMISD::MEMCPY) | |||
1839 | MAKE_CASE(ARMISD::VLD1DUP) | |||
1840 | MAKE_CASE(ARMISD::VLD2DUP) | |||
1841 | MAKE_CASE(ARMISD::VLD3DUP) | |||
1842 | MAKE_CASE(ARMISD::VLD4DUP) | |||
1843 | MAKE_CASE(ARMISD::VLD1_UPD) | |||
1844 | MAKE_CASE(ARMISD::VLD2_UPD) | |||
1845 | MAKE_CASE(ARMISD::VLD3_UPD) | |||
1846 | MAKE_CASE(ARMISD::VLD4_UPD) | |||
1847 | MAKE_CASE(ARMISD::VLD1x2_UPD) | |||
1848 | MAKE_CASE(ARMISD::VLD1x3_UPD) | |||
1849 | MAKE_CASE(ARMISD::VLD1x4_UPD) | |||
1850 | MAKE_CASE(ARMISD::VLD2LN_UPD) | |||
1851 | MAKE_CASE(ARMISD::VLD3LN_UPD) | |||
1852 | MAKE_CASE(ARMISD::VLD4LN_UPD) | |||
1853 | MAKE_CASE(ARMISD::VLD1DUP_UPD) | |||
1854 | MAKE_CASE(ARMISD::VLD2DUP_UPD) | |||
1855 | MAKE_CASE(ARMISD::VLD3DUP_UPD) | |||
1856 | MAKE_CASE(ARMISD::VLD4DUP_UPD) | |||
1857 | MAKE_CASE(ARMISD::VST1_UPD) | |||
1858 | MAKE_CASE(ARMISD::VST2_UPD) | |||
1859 | MAKE_CASE(ARMISD::VST3_UPD) | |||
1860 | MAKE_CASE(ARMISD::VST4_UPD) | |||
1861 | MAKE_CASE(ARMISD::VST1x2_UPD) | |||
1862 | MAKE_CASE(ARMISD::VST1x3_UPD) | |||
1863 | MAKE_CASE(ARMISD::VST1x4_UPD) | |||
1864 | MAKE_CASE(ARMISD::VST2LN_UPD) | |||
1865 | MAKE_CASE(ARMISD::VST3LN_UPD) | |||
1866 | MAKE_CASE(ARMISD::VST4LN_UPD) | |||
1867 | MAKE_CASE(ARMISD::WLS) | |||
1868 | MAKE_CASE(ARMISD::WLSSETUP) | |||
1869 | MAKE_CASE(ARMISD::LE) | |||
1870 | MAKE_CASE(ARMISD::LOOP_DEC) | |||
1871 | MAKE_CASE(ARMISD::CSINV) | |||
1872 | MAKE_CASE(ARMISD::CSNEG) | |||
1873 | MAKE_CASE(ARMISD::CSINC) | |||
1874 | MAKE_CASE(ARMISD::MEMCPYLOOP) | |||
1875 | MAKE_CASE(ARMISD::MEMSETLOOP) | |||
1876 | #undef MAKE_CASE | |||
1877 | } | |||
1878 | return nullptr; | |||
1879 | } | |||
1880 | ||||
1881 | EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, | |||
1882 | EVT VT) const { | |||
1883 | if (!VT.isVector()) | |||
1884 | return getPointerTy(DL); | |||
1885 | ||||
1886 | // MVE has a predicate register. | |||
1887 | if ((Subtarget->hasMVEIntegerOps() && | |||
1888 | (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || | |||
1889 | VT == MVT::v16i8)) || | |||
1890 | (Subtarget->hasMVEFloatOps() && | |||
1891 | (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16))) | |||
1892 | return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); | |||
1893 | return VT.changeVectorElementTypeToInteger(); | |||
1894 | } | |||
1895 | ||||
1896 | /// getRegClassFor - Return the register class that should be used for the | |||
1897 | /// specified value type. | |||
1898 | const TargetRegisterClass * | |||
1899 | ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { | |||
1900 | (void)isDivergent; | |||
1901 | // Map v4i64 to QQ registers but do not make the type legal. Similarly map | |||
1902 | // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to | |||
1903 | // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive | |||
1904 | // MVE Q registers. | |||
1905 | if (Subtarget->hasNEON()) { | |||
1906 | if (VT == MVT::v4i64) | |||
1907 | return &ARM::QQPRRegClass; | |||
1908 | if (VT == MVT::v8i64) | |||
1909 | return &ARM::QQQQPRRegClass; | |||
1910 | } | |||
1911 | if (Subtarget->hasMVEIntegerOps()) { | |||
1912 | if (VT == MVT::v4i64) | |||
1913 | return &ARM::MQQPRRegClass; | |||
1914 | if (VT == MVT::v8i64) | |||
1915 | return &ARM::MQQQQPRRegClass; | |||
1916 | } | |||
1917 | return TargetLowering::getRegClassFor(VT); | |||
1918 | } | |||
1919 | ||||
1920 | // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the | |||
1921 | // source/dest is aligned and the copy size is large enough. We therefore want | |||
1922 | // to align such objects passed to memory intrinsics. | |||
1923 | bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, | |||
1924 | Align &PrefAlign) const { | |||
1925 | if (!isa<MemIntrinsic>(CI)) | |||
1926 | return false; | |||
1927 | MinSize = 8; | |||
1928 | // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 | |||
1929 | // cycle faster than 4-byte aligned LDM. | |||
1930 | PrefAlign = | |||
1931 | (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4)); | |||
1932 | return true; | |||
1933 | } | |||
1934 | ||||
1935 | // Create a fast isel object. | |||
1936 | FastISel * | |||
1937 | ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, | |||
1938 | const TargetLibraryInfo *libInfo) const { | |||
1939 | return ARM::createFastISel(funcInfo, libInfo); | |||
1940 | } | |||
1941 | ||||
1942 | Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { | |||
1943 | unsigned NumVals = N->getNumValues(); | |||
1944 | if (!NumVals) | |||
1945 | return Sched::RegPressure; | |||
1946 | ||||
1947 | for (unsigned i = 0; i != NumVals; ++i) { | |||
1948 | EVT VT = N->getValueType(i); | |||
1949 | if (VT == MVT::Glue || VT == MVT::Other) | |||
1950 | continue; | |||
1951 | if (VT.isFloatingPoint() || VT.isVector()) | |||
1952 | return Sched::ILP; | |||
1953 | } | |||
1954 | ||||
1955 | if (!N->isMachineOpcode()) | |||
1956 | return Sched::RegPressure; | |||
1957 | ||||
1958 | // Load are scheduled for latency even if there instruction itinerary | |||
1959 | // is not available. | |||
1960 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); | |||
1961 | const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); | |||
1962 | ||||
1963 | if (MCID.getNumDefs() == 0) | |||
1964 | return Sched::RegPressure; | |||
1965 | if (!Itins->isEmpty() && | |||
1966 | Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) | |||
1967 | return Sched::ILP; | |||
1968 | ||||
1969 | return Sched::RegPressure; | |||
1970 | } | |||
1971 | ||||
1972 | //===----------------------------------------------------------------------===// | |||
1973 | // Lowering Code | |||
1974 | //===----------------------------------------------------------------------===// | |||
1975 | ||||
1976 | static bool isSRL16(const SDValue &Op) { | |||
1977 | if (Op.getOpcode() != ISD::SRL) | |||
1978 | return false; | |||
1979 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | |||
1980 | return Const->getZExtValue() == 16; | |||
1981 | return false; | |||
1982 | } | |||
1983 | ||||
1984 | static bool isSRA16(const SDValue &Op) { | |||
1985 | if (Op.getOpcode() != ISD::SRA) | |||
1986 | return false; | |||
1987 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | |||
1988 | return Const->getZExtValue() == 16; | |||
1989 | return false; | |||
1990 | } | |||
1991 | ||||
1992 | static bool isSHL16(const SDValue &Op) { | |||
1993 | if (Op.getOpcode() != ISD::SHL) | |||
1994 | return false; | |||
1995 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | |||
1996 | return Const->getZExtValue() == 16; | |||
1997 | return false; | |||
1998 | } | |||
1999 | ||||
2000 | // Check for a signed 16-bit value. We special case SRA because it makes it | |||
2001 | // more simple when also looking for SRAs that aren't sign extending a | |||
2002 | // smaller value. Without the check, we'd need to take extra care with | |||
2003 | // checking order for some operations. | |||
2004 | static bool isS16(const SDValue &Op, SelectionDAG &DAG) { | |||
2005 | if (isSRA16(Op)) | |||
2006 | return isSHL16(Op.getOperand(0)); | |||
2007 | return DAG.ComputeNumSignBits(Op) == 17; | |||
2008 | } | |||
2009 | ||||
2010 | /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC | |||
2011 | static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { | |||
2012 | switch (CC) { | |||
2013 | default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2013); | |||
2014 | case ISD::SETNE: return ARMCC::NE; | |||
2015 | case ISD::SETEQ: return ARMCC::EQ; | |||
2016 | case ISD::SETGT: return ARMCC::GT; | |||
2017 | case ISD::SETGE: return ARMCC::GE; | |||
2018 | case ISD::SETLT: return ARMCC::LT; | |||
2019 | case ISD::SETLE: return ARMCC::LE; | |||
2020 | case ISD::SETUGT: return ARMCC::HI; | |||
2021 | case ISD::SETUGE: return ARMCC::HS; | |||
2022 | case ISD::SETULT: return ARMCC::LO; | |||
2023 | case ISD::SETULE: return ARMCC::LS; | |||
2024 | } | |||
2025 | } | |||
2026 | ||||
2027 | /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. | |||
2028 | static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, | |||
2029 | ARMCC::CondCodes &CondCode2) { | |||
2030 | CondCode2 = ARMCC::AL; | |||
2031 | switch (CC) { | |||
2032 | default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2032); | |||
2033 | case ISD::SETEQ: | |||
2034 | case ISD::SETOEQ: CondCode = ARMCC::EQ; break; | |||
2035 | case ISD::SETGT: | |||
2036 | case ISD::SETOGT: CondCode = ARMCC::GT; break; | |||
2037 | case ISD::SETGE: | |||
2038 | case ISD::SETOGE: CondCode = ARMCC::GE; break; | |||
2039 | case ISD::SETOLT: CondCode = ARMCC::MI; break; | |||
2040 | case ISD::SETOLE: CondCode = ARMCC::LS; break; | |||
2041 | case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; | |||
2042 | case ISD::SETO: CondCode = ARMCC::VC; break; | |||
2043 | case ISD::SETUO: CondCode = ARMCC::VS; break; | |||
2044 | case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; | |||
2045 | case ISD::SETUGT: CondCode = ARMCC::HI; break; | |||
2046 | case ISD::SETUGE: CondCode = ARMCC::PL; break; | |||
2047 | case ISD::SETLT: | |||
2048 | case ISD::SETULT: CondCode = ARMCC::LT; break; | |||
2049 | case ISD::SETLE: | |||
2050 | case ISD::SETULE: CondCode = ARMCC::LE; break; | |||
2051 | case ISD::SETNE: | |||
2052 | case ISD::SETUNE: CondCode = ARMCC::NE; break; | |||
2053 | } | |||
2054 | } | |||
2055 | ||||
2056 | //===----------------------------------------------------------------------===// | |||
2057 | // Calling Convention Implementation | |||
2058 | //===----------------------------------------------------------------------===// | |||
2059 | ||||
2060 | /// getEffectiveCallingConv - Get the effective calling convention, taking into | |||
2061 | /// account presence of floating point hardware and calling convention | |||
2062 | /// limitations, such as support for variadic functions. | |||
2063 | CallingConv::ID | |||
2064 | ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, | |||
2065 | bool isVarArg) const { | |||
2066 | switch (CC) { | |||
2067 | default: | |||
2068 | report_fatal_error("Unsupported calling convention"); | |||
2069 | case CallingConv::ARM_AAPCS: | |||
2070 | case CallingConv::ARM_APCS: | |||
2071 | case CallingConv::GHC: | |||
2072 | case CallingConv::CFGuard_Check: | |||
2073 | return CC; | |||
2074 | case CallingConv::PreserveMost: | |||
2075 | return CallingConv::PreserveMost; | |||
2076 | case CallingConv::ARM_AAPCS_VFP: | |||
2077 | case CallingConv::Swift: | |||
2078 | case CallingConv::SwiftTail: | |||
2079 | return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; | |||
2080 | case CallingConv::C: | |||
2081 | case CallingConv::Tail: | |||
2082 | if (!Subtarget->isAAPCS_ABI()) | |||
2083 | return CallingConv::ARM_APCS; | |||
2084 | else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() && | |||
2085 | getTargetMachine().Options.FloatABIType == FloatABI::Hard && | |||
2086 | !isVarArg) | |||
2087 | return CallingConv::ARM_AAPCS_VFP; | |||
2088 | else | |||
2089 | return CallingConv::ARM_AAPCS; | |||
2090 | case CallingConv::Fast: | |||
2091 | case CallingConv::CXX_FAST_TLS: | |||
2092 | if (!Subtarget->isAAPCS_ABI()) { | |||
2093 | if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg) | |||
2094 | return CallingConv::Fast; | |||
2095 | return CallingConv::ARM_APCS; | |||
2096 | } else if (Subtarget->hasVFP2Base() && | |||
2097 | !Subtarget->isThumb1Only() && !isVarArg) | |||
2098 | return CallingConv::ARM_AAPCS_VFP; | |||
2099 | else | |||
2100 | return CallingConv::ARM_AAPCS; | |||
2101 | } | |||
2102 | } | |||
2103 | ||||
2104 | CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, | |||
2105 | bool isVarArg) const { | |||
2106 | return CCAssignFnForNode(CC, false, isVarArg); | |||
2107 | } | |||
2108 | ||||
2109 | CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, | |||
2110 | bool isVarArg) const { | |||
2111 | return CCAssignFnForNode(CC, true, isVarArg); | |||
2112 | } | |||
2113 | ||||
2114 | /// CCAssignFnForNode - Selects the correct CCAssignFn for the given | |||
2115 | /// CallingConvention. | |||
2116 | CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, | |||
2117 | bool Return, | |||
2118 | bool isVarArg) const { | |||
2119 | switch (getEffectiveCallingConv(CC, isVarArg)) { | |||
2120 | default: | |||
2121 | report_fatal_error("Unsupported calling convention"); | |||
2122 | case CallingConv::ARM_APCS: | |||
2123 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); | |||
2124 | case CallingConv::ARM_AAPCS: | |||
2125 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); | |||
2126 | case CallingConv::ARM_AAPCS_VFP: | |||
2127 | return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); | |||
2128 | case CallingConv::Fast: | |||
2129 | return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); | |||
2130 | case CallingConv::GHC: | |||
2131 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); | |||
2132 | case CallingConv::PreserveMost: | |||
2133 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); | |||
2134 | case CallingConv::CFGuard_Check: | |||
2135 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check); | |||
2136 | } | |||
2137 | } | |||
2138 | ||||
2139 | SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, | |||
2140 | MVT LocVT, MVT ValVT, SDValue Val) const { | |||
2141 | Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()), | |||
2142 | Val); | |||
2143 | if (Subtarget->hasFullFP16()) { | |||
2144 | Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val); | |||
2145 | } else { | |||
2146 | Val = DAG.getNode(ISD::TRUNCATE, dl, | |||
2147 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); | |||
2148 | Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val); | |||
2149 | } | |||
2150 | return Val; | |||
2151 | } | |||
2152 | ||||
2153 | SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, | |||
2154 | MVT LocVT, MVT ValVT, | |||
2155 | SDValue Val) const { | |||
2156 | if (Subtarget->hasFullFP16()) { | |||
2157 | Val = DAG.getNode(ARMISD::VMOVrh, dl, | |||
2158 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); | |||
2159 | } else { | |||
2160 | Val = DAG.getNode(ISD::BITCAST, dl, | |||
2161 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); | |||
2162 | Val = DAG.getNode(ISD::ZERO_EXTEND, dl, | |||
2163 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); | |||
2164 | } | |||
2165 | return DAG.getNode(ISD::BITCAST, dl, LocVT, Val); | |||
2166 | } | |||
2167 | ||||
2168 | /// LowerCallResult - Lower the result values of a call into the | |||
2169 | /// appropriate copies out of appropriate physical registers. | |||
2170 | SDValue ARMTargetLowering::LowerCallResult( | |||
2171 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, | |||
2172 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | |||
2173 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, | |||
2174 | SDValue ThisVal) const { | |||
2175 | // Assign locations to each value returned by this call. | |||
2176 | SmallVector<CCValAssign, 16> RVLocs; | |||
2177 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, | |||
2178 | *DAG.getContext()); | |||
2179 | CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); | |||
2180 | ||||
2181 | // Copy all of the result registers out of their specified physreg. | |||
2182 | for (unsigned i = 0; i != RVLocs.size(); ++i) { | |||
2183 | CCValAssign VA = RVLocs[i]; | |||
2184 | ||||
2185 | // Pass 'this' value directly from the argument to return value, to avoid | |||
2186 | // reg unit interference | |||
2187 | if (i == 0 && isThisReturn) { | |||
2188 | assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2189, __extension__ __PRETTY_FUNCTION__)) | |||
2189 | "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2189, __extension__ __PRETTY_FUNCTION__)); | |||
2190 | InVals.push_back(ThisVal); | |||
2191 | continue; | |||
2192 | } | |||
2193 | ||||
2194 | SDValue Val; | |||
2195 | if (VA.needsCustom() && | |||
2196 | (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) { | |||
2197 | // Handle f64 or half of a v2f64. | |||
2198 | SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, | |||
2199 | InFlag); | |||
2200 | Chain = Lo.getValue(1); | |||
2201 | InFlag = Lo.getValue(2); | |||
2202 | VA = RVLocs[++i]; // skip ahead to next loc | |||
2203 | SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, | |||
2204 | InFlag); | |||
2205 | Chain = Hi.getValue(1); | |||
2206 | InFlag = Hi.getValue(2); | |||
2207 | if (!Subtarget->isLittle()) | |||
2208 | std::swap (Lo, Hi); | |||
2209 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); | |||
2210 | ||||
2211 | if (VA.getLocVT() == MVT::v2f64) { | |||
2212 | SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); | |||
2213 | Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, | |||
2214 | DAG.getConstant(0, dl, MVT::i32)); | |||
2215 | ||||
2216 | VA = RVLocs[++i]; // skip ahead to next loc | |||
2217 | Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); | |||
2218 | Chain = Lo.getValue(1); | |||
2219 | InFlag = Lo.getValue(2); | |||
2220 | VA = RVLocs[++i]; // skip ahead to next loc | |||
2221 | Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); | |||
2222 | Chain = Hi.getValue(1); | |||
2223 | InFlag = Hi.getValue(2); | |||
2224 | if (!Subtarget->isLittle()) | |||
2225 | std::swap (Lo, Hi); | |||
2226 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); | |||
2227 | Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, | |||
2228 | DAG.getConstant(1, dl, MVT::i32)); | |||
2229 | } | |||
2230 | } else { | |||
2231 | Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), | |||
2232 | InFlag); | |||
2233 | Chain = Val.getValue(1); | |||
2234 | InFlag = Val.getValue(2); | |||
2235 | } | |||
2236 | ||||
2237 | switch (VA.getLocInfo()) { | |||
2238 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2238); | |||
2239 | case CCValAssign::Full: break; | |||
2240 | case CCValAssign::BCvt: | |||
2241 | Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); | |||
2242 | break; | |||
2243 | } | |||
2244 | ||||
2245 | // f16 arguments have their size extended to 4 bytes and passed as if they | |||
2246 | // had been copied to the LSBs of a 32-bit register. | |||
2247 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | |||
2248 | if (VA.needsCustom() && | |||
2249 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) | |||
2250 | Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val); | |||
2251 | ||||
2252 | InVals.push_back(Val); | |||
2253 | } | |||
2254 | ||||
2255 | return Chain; | |||
2256 | } | |||
2257 | ||||
2258 | std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg( | |||
2259 | const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr, | |||
2260 | bool IsTailCall, int SPDiff) const { | |||
2261 | SDValue DstAddr; | |||
2262 | MachinePointerInfo DstInfo; | |||
2263 | int32_t Offset = VA.getLocMemOffset(); | |||
2264 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2265 | ||||
2266 | if (IsTailCall) { | |||
2267 | Offset += SPDiff; | |||
2268 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | |||
2269 | int Size = VA.getLocVT().getFixedSizeInBits() / 8; | |||
2270 | int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); | |||
2271 | DstAddr = DAG.getFrameIndex(FI, PtrVT); | |||
2272 | DstInfo = | |||
2273 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); | |||
2274 | } else { | |||
2275 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); | |||
2276 | DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), | |||
2277 | StackPtr, PtrOff); | |||
2278 | DstInfo = | |||
2279 | MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset); | |||
2280 | } | |||
2281 | ||||
2282 | return std::make_pair(DstAddr, DstInfo); | |||
2283 | } | |||
2284 | ||||
2285 | void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, | |||
2286 | SDValue Chain, SDValue &Arg, | |||
2287 | RegsToPassVector &RegsToPass, | |||
2288 | CCValAssign &VA, CCValAssign &NextVA, | |||
2289 | SDValue &StackPtr, | |||
2290 | SmallVectorImpl<SDValue> &MemOpChains, | |||
2291 | bool IsTailCall, | |||
2292 | int SPDiff) const { | |||
2293 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, | |||
2294 | DAG.getVTList(MVT::i32, MVT::i32), Arg); | |||
2295 | unsigned id = Subtarget->isLittle() ? 0 : 1; | |||
2296 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); | |||
2297 | ||||
2298 | if (NextVA.isRegLoc()) | |||
2299 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); | |||
2300 | else { | |||
2301 | assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail ("NextVA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2301, __extension__ __PRETTY_FUNCTION__)); | |||
2302 | if (!StackPtr.getNode()) | |||
2303 | StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, | |||
2304 | getPointerTy(DAG.getDataLayout())); | |||
2305 | ||||
2306 | SDValue DstAddr; | |||
2307 | MachinePointerInfo DstInfo; | |||
2308 | std::tie(DstAddr, DstInfo) = | |||
2309 | computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff); | |||
2310 | MemOpChains.push_back( | |||
2311 | DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo)); | |||
2312 | } | |||
2313 | } | |||
2314 | ||||
2315 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { | |||
2316 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || | |||
2317 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; | |||
2318 | } | |||
2319 | ||||
2320 | /// LowerCall - Lowering a call into a callseq_start <- | |||
2321 | /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter | |||
2322 | /// nodes. | |||
2323 | SDValue | |||
2324 | ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||
2325 | SmallVectorImpl<SDValue> &InVals) const { | |||
2326 | SelectionDAG &DAG = CLI.DAG; | |||
2327 | SDLoc &dl = CLI.DL; | |||
2328 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; | |||
2329 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; | |||
2330 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; | |||
2331 | SDValue Chain = CLI.Chain; | |||
2332 | SDValue Callee = CLI.Callee; | |||
2333 | bool &isTailCall = CLI.IsTailCall; | |||
2334 | CallingConv::ID CallConv = CLI.CallConv; | |||
2335 | bool doesNotRet = CLI.DoesNotReturn; | |||
2336 | bool isVarArg = CLI.IsVarArg; | |||
2337 | ||||
2338 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2339 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
2340 | MachineFunction::CallSiteInfo CSInfo; | |||
2341 | bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); | |||
2342 | bool isThisReturn = false; | |||
2343 | bool isCmseNSCall = false; | |||
2344 | bool isSibCall = false; | |||
2345 | bool PreferIndirect = false; | |||
2346 | bool GuardWithBTI = false; | |||
2347 | ||||
2348 | // Lower 'returns_twice' calls to a pseudo-instruction. | |||
2349 | if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && | |||
2350 | !Subtarget->noBTIAtReturnTwice()) | |||
2351 | GuardWithBTI = AFI->branchTargetEnforcement(); | |||
2352 | ||||
2353 | // Determine whether this is a non-secure function call. | |||
2354 | if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call")) | |||
2355 | isCmseNSCall = true; | |||
2356 | ||||
2357 | // Disable tail calls if they're not supported. | |||
2358 | if (!Subtarget->supportsTailCall()) | |||
2359 | isTailCall = false; | |||
2360 | ||||
2361 | // For both the non-secure calls and the returns from a CMSE entry function, | |||
2362 | // the function needs to do some extra work afte r the call, or before the | |||
2363 | // return, respectively, thus it cannot end with atail call | |||
2364 | if (isCmseNSCall || AFI->isCmseNSEntryFunction()) | |||
2365 | isTailCall = false; | |||
2366 | ||||
2367 | if (isa<GlobalAddressSDNode>(Callee)) { | |||
2368 | // If we're optimizing for minimum size and the function is called three or | |||
2369 | // more times in this block, we can improve codesize by calling indirectly | |||
2370 | // as BLXr has a 16-bit encoding. | |||
2371 | auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); | |||
2372 | if (CLI.CB) { | |||
2373 | auto *BB = CLI.CB->getParent(); | |||
2374 | PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() && | |||
2375 | count_if(GV->users(), [&BB](const User *U) { | |||
2376 | return isa<Instruction>(U) && | |||
2377 | cast<Instruction>(U)->getParent() == BB; | |||
2378 | }) > 2; | |||
2379 | } | |||
2380 | } | |||
2381 | if (isTailCall) { | |||
2382 | // Check if it's really possible to do a tail call. | |||
2383 | isTailCall = IsEligibleForTailCallOptimization( | |||
2384 | Callee, CallConv, isVarArg, isStructRet, | |||
2385 | MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG, | |||
2386 | PreferIndirect); | |||
2387 | ||||
2388 | if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt && | |||
2389 | CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail) | |||
2390 | isSibCall = true; | |||
2391 | ||||
2392 | // We don't support GuaranteedTailCallOpt for ARM, only automatically | |||
2393 | // detected sibcalls. | |||
2394 | if (isTailCall) | |||
2395 | ++NumTailCalls; | |||
2396 | } | |||
2397 | ||||
2398 | if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall()) | |||
2399 | report_fatal_error("failed to perform tail call elimination on a call " | |||
2400 | "site marked musttail"); | |||
2401 | // Analyze operands of the call, assigning locations to each operand. | |||
2402 | SmallVector<CCValAssign, 16> ArgLocs; | |||
2403 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, | |||
2404 | *DAG.getContext()); | |||
2405 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); | |||
2406 | ||||
2407 | // Get a count of how many bytes are to be pushed on the stack. | |||
2408 | unsigned NumBytes = CCInfo.getNextStackOffset(); | |||
2409 | ||||
2410 | // SPDiff is the byte offset of the call's argument area from the callee's. | |||
2411 | // Stores to callee stack arguments will be placed in FixedStackSlots offset | |||
2412 | // by this amount for a tail call. In a sibling call it must be 0 because the | |||
2413 | // caller will deallocate the entire stack and the callee still expects its | |||
2414 | // arguments to begin at SP+0. Completely unused for non-tail calls. | |||
2415 | int SPDiff = 0; | |||
2416 | ||||
2417 | if (isTailCall && !isSibCall) { | |||
2418 | auto FuncInfo = MF.getInfo<ARMFunctionInfo>(); | |||
2419 | unsigned NumReusableBytes = FuncInfo->getArgumentStackSize(); | |||
2420 | ||||
2421 | // Since callee will pop argument stack as a tail call, we must keep the | |||
2422 | // popped size 16-byte aligned. | |||
2423 | Align StackAlign = DAG.getDataLayout().getStackAlignment(); | |||
2424 | NumBytes = alignTo(NumBytes, StackAlign); | |||
2425 | ||||
2426 | // SPDiff will be negative if this tail call requires more space than we | |||
2427 | // would automatically have in our incoming argument space. Positive if we | |||
2428 | // can actually shrink the stack. | |||
2429 | SPDiff = NumReusableBytes - NumBytes; | |||
2430 | ||||
2431 | // If this call requires more stack than we have available from | |||
2432 | // LowerFormalArguments, tell FrameLowering to reserve space for it. | |||
2433 | if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff) | |||
2434 | AFI->setArgRegsSaveSize(-SPDiff); | |||
2435 | } | |||
2436 | ||||
2437 | if (isSibCall) { | |||
2438 | // For sibling tail calls, memory operands are available in our caller's stack. | |||
2439 | NumBytes = 0; | |||
2440 | } else { | |||
2441 | // Adjust the stack pointer for the new arguments... | |||
2442 | // These operations are automatically eliminated by the prolog/epilog pass | |||
2443 | Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl); | |||
2444 | } | |||
2445 | ||||
2446 | SDValue StackPtr = | |||
2447 | DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); | |||
2448 | ||||
2449 | RegsToPassVector RegsToPass; | |||
2450 | SmallVector<SDValue, 8> MemOpChains; | |||
2451 | ||||
2452 | // During a tail call, stores to the argument area must happen after all of | |||
2453 | // the function's incoming arguments have been loaded because they may alias. | |||
2454 | // This is done by folding in a TokenFactor from LowerFormalArguments, but | |||
2455 | // there's no point in doing so repeatedly so this tracks whether that's | |||
2456 | // happened yet. | |||
2457 | bool AfterFormalArgLoads = false; | |||
2458 | ||||
2459 | // Walk the register/memloc assignments, inserting copies/loads. In the case | |||
2460 | // of tail call optimization, arguments are handled later. | |||
2461 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); | |||
2462 | i != e; | |||
2463 | ++i, ++realArgIdx) { | |||
2464 | CCValAssign &VA = ArgLocs[i]; | |||
2465 | SDValue Arg = OutVals[realArgIdx]; | |||
2466 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; | |||
2467 | bool isByVal = Flags.isByVal(); | |||
2468 | ||||
2469 | // Promote the value if needed. | |||
2470 | switch (VA.getLocInfo()) { | |||
2471 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2471); | |||
2472 | case CCValAssign::Full: break; | |||
2473 | case CCValAssign::SExt: | |||
2474 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); | |||
2475 | break; | |||
2476 | case CCValAssign::ZExt: | |||
2477 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); | |||
2478 | break; | |||
2479 | case CCValAssign::AExt: | |||
2480 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); | |||
2481 | break; | |||
2482 | case CCValAssign::BCvt: | |||
2483 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | |||
2484 | break; | |||
2485 | } | |||
2486 | ||||
2487 | if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) { | |||
2488 | Chain = DAG.getStackArgumentTokenFactor(Chain); | |||
2489 | AfterFormalArgLoads = true; | |||
2490 | } | |||
2491 | ||||
2492 | // f16 arguments have their size extended to 4 bytes and passed as if they | |||
2493 | // had been copied to the LSBs of a 32-bit register. | |||
2494 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | |||
2495 | if (VA.needsCustom() && | |||
2496 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { | |||
2497 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); | |||
2498 | } else { | |||
2499 | // f16 arguments could have been extended prior to argument lowering. | |||
2500 | // Mask them arguments if this is a CMSE nonsecure call. | |||
2501 | auto ArgVT = Outs[realArgIdx].ArgVT; | |||
2502 | if (isCmseNSCall && (ArgVT == MVT::f16)) { | |||
2503 | auto LocBits = VA.getLocVT().getSizeInBits(); | |||
2504 | auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits()); | |||
2505 | SDValue Mask = | |||
2506 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); | |||
2507 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); | |||
2508 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); | |||
2509 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | |||
2510 | } | |||
2511 | } | |||
2512 | ||||
2513 | // f64 and v2f64 might be passed in i32 pairs and must be split into pieces | |||
2514 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { | |||
2515 | SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | |||
2516 | DAG.getConstant(0, dl, MVT::i32)); | |||
2517 | SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | |||
2518 | DAG.getConstant(1, dl, MVT::i32)); | |||
2519 | ||||
2520 | PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], | |||
2521 | StackPtr, MemOpChains, isTailCall, SPDiff); | |||
2522 | ||||
2523 | VA = ArgLocs[++i]; // skip ahead to next loc | |||
2524 | if (VA.isRegLoc()) { | |||
2525 | PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], | |||
2526 | StackPtr, MemOpChains, isTailCall, SPDiff); | |||
2527 | } else { | |||
2528 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2528, __extension__ __PRETTY_FUNCTION__)); | |||
2529 | SDValue DstAddr; | |||
2530 | MachinePointerInfo DstInfo; | |||
2531 | std::tie(DstAddr, DstInfo) = | |||
2532 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | |||
2533 | MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo)); | |||
2534 | } | |||
2535 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { | |||
2536 | PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], | |||
2537 | StackPtr, MemOpChains, isTailCall, SPDiff); | |||
2538 | } else if (VA.isRegLoc()) { | |||
2539 | if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && | |||
2540 | Outs[0].VT == MVT::i32) { | |||
2541 | assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2542, __extension__ __PRETTY_FUNCTION__)) | |||
2542 | "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2542, __extension__ __PRETTY_FUNCTION__)); | |||
2543 | assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2544, __extension__ __PRETTY_FUNCTION__)) | |||
2544 | "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2544, __extension__ __PRETTY_FUNCTION__)); | |||
2545 | isThisReturn = true; | |||
2546 | } | |||
2547 | const TargetOptions &Options = DAG.getTarget().Options; | |||
2548 | if (Options.EmitCallSiteInfo) | |||
2549 | CSInfo.emplace_back(VA.getLocReg(), i); | |||
2550 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); | |||
2551 | } else if (isByVal) { | |||
2552 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2552, __extension__ __PRETTY_FUNCTION__)); | |||
2553 | unsigned offset = 0; | |||
2554 | ||||
2555 | // True if this byval aggregate will be split between registers | |||
2556 | // and memory. | |||
2557 | unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); | |||
2558 | unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); | |||
2559 | ||||
2560 | if (CurByValIdx < ByValArgsCount) { | |||
2561 | ||||
2562 | unsigned RegBegin, RegEnd; | |||
2563 | CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); | |||
2564 | ||||
2565 | EVT PtrVT = | |||
2566 | DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); | |||
2567 | unsigned int i, j; | |||
2568 | for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { | |||
2569 | SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); | |||
2570 | SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); | |||
2571 | SDValue Load = | |||
2572 | DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), | |||
2573 | DAG.InferPtrAlign(AddArg)); | |||
2574 | MemOpChains.push_back(Load.getValue(1)); | |||
2575 | RegsToPass.push_back(std::make_pair(j, Load)); | |||
2576 | } | |||
2577 | ||||
2578 | // If parameter size outsides register area, "offset" value | |||
2579 | // helps us to calculate stack slot for remained part properly. | |||
2580 | offset = RegEnd - RegBegin; | |||
2581 | ||||
2582 | CCInfo.nextInRegsParam(); | |||
2583 | } | |||
2584 | ||||
2585 | if (Flags.getByValSize() > 4*offset) { | |||
2586 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | |||
2587 | SDValue Dst; | |||
2588 | MachinePointerInfo DstInfo; | |||
2589 | std::tie(Dst, DstInfo) = | |||
2590 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | |||
2591 | SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); | |||
2592 | SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); | |||
2593 | SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, | |||
2594 | MVT::i32); | |||
2595 | SDValue AlignNode = | |||
2596 | DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32); | |||
2597 | ||||
2598 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
2599 | SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; | |||
2600 | MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, | |||
2601 | Ops)); | |||
2602 | } | |||
2603 | } else { | |||
2604 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2604, __extension__ __PRETTY_FUNCTION__)); | |||
2605 | SDValue DstAddr; | |||
2606 | MachinePointerInfo DstInfo; | |||
2607 | std::tie(DstAddr, DstInfo) = | |||
2608 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | |||
2609 | ||||
2610 | SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo); | |||
2611 | MemOpChains.push_back(Store); | |||
2612 | } | |||
2613 | } | |||
2614 | ||||
2615 | if (!MemOpChains.empty()) | |||
2616 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); | |||
2617 | ||||
2618 | // Build a sequence of copy-to-reg nodes chained together with token chain | |||
2619 | // and flag operands which copy the outgoing args into the appropriate regs. | |||
2620 | SDValue InFlag; | |||
2621 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { | |||
2622 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, | |||
2623 | RegsToPass[i].second, InFlag); | |||
2624 | InFlag = Chain.getValue(1); | |||
2625 | } | |||
2626 | ||||
2627 | // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every | |||
2628 | // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol | |||
2629 | // node so that legalize doesn't hack it. | |||
2630 | bool isDirect = false; | |||
2631 | ||||
2632 | const TargetMachine &TM = getTargetMachine(); | |||
2633 | const Module *Mod = MF.getFunction().getParent(); | |||
2634 | const GlobalValue *GVal = nullptr; | |||
2635 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) | |||
2636 | GVal = G->getGlobal(); | |||
2637 | bool isStub = | |||
2638 | !TM.shouldAssumeDSOLocal(*Mod, GVal) && Subtarget->isTargetMachO(); | |||
2639 | ||||
2640 | bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); | |||
2641 | bool isLocalARMFunc = false; | |||
2642 | auto PtrVt = getPointerTy(DAG.getDataLayout()); | |||
2643 | ||||
2644 | if (Subtarget->genLongCalls()) { | |||
2645 | assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2646, __extension__ __PRETTY_FUNCTION__)) | |||
2646 | "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2646, __extension__ __PRETTY_FUNCTION__)); | |||
2647 | // Handle a global address or an external symbol. If it's not one of | |||
2648 | // those, the target's already in a register, so we don't need to do | |||
2649 | // anything extra. | |||
2650 | if (isa<GlobalAddressSDNode>(Callee)) { | |||
2651 | // When generating execute-only code we use movw movt pair. | |||
2652 | // Currently execute-only is only available for architectures that | |||
2653 | // support movw movt, so we are safe to assume that. | |||
2654 | if (Subtarget->genExecuteOnly()) { | |||
2655 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2656, __extension__ __PRETTY_FUNCTION__)) | |||
2656 | "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2656, __extension__ __PRETTY_FUNCTION__)); | |||
2657 | ++NumMovwMovt; | |||
2658 | Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt, | |||
2659 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt)); | |||
2660 | } else { | |||
2661 | // Create a constant pool entry for the callee address | |||
2662 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | |||
2663 | ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( | |||
2664 | GVal, ARMPCLabelIndex, ARMCP::CPValue, 0); | |||
2665 | ||||
2666 | // Get the address of the callee into a register | |||
2667 | SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | |||
2668 | Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr); | |||
2669 | Callee = DAG.getLoad( | |||
2670 | PtrVt, dl, DAG.getEntryNode(), Addr, | |||
2671 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
2672 | } | |||
2673 | } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { | |||
2674 | const char *Sym = S->getSymbol(); | |||
2675 | ||||
2676 | // When generating execute-only code we use movw movt pair. | |||
2677 | // Currently execute-only is only available for architectures that | |||
2678 | // support movw movt, so we are safe to assume that. | |||
2679 | if (Subtarget->genExecuteOnly()) { | |||
2680 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2681, __extension__ __PRETTY_FUNCTION__)) | |||
2681 | "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2681, __extension__ __PRETTY_FUNCTION__)); | |||
2682 | ++NumMovwMovt; | |||
2683 | Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt, | |||
2684 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt)); | |||
2685 | } else { | |||
2686 | // Create a constant pool entry for the callee address | |||
2687 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | |||
2688 | ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( | |||
2689 | *DAG.getContext(), Sym, ARMPCLabelIndex, 0); | |||
2690 | ||||
2691 | // Get the address of the callee into a register | |||
2692 | SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | |||
2693 | Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr); | |||
2694 | Callee = DAG.getLoad( | |||
2695 | PtrVt, dl, DAG.getEntryNode(), Addr, | |||
2696 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
2697 | } | |||
2698 | } | |||
2699 | } else if (isa<GlobalAddressSDNode>(Callee)) { | |||
2700 | if (!PreferIndirect) { | |||
2701 | isDirect = true; | |||
2702 | bool isDef = GVal->isStrongDefinitionForLinker(); | |||
2703 | ||||
2704 | // ARM call to a local ARM function is predicable. | |||
2705 | isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); | |||
2706 | // tBX takes a register source operand. | |||
2707 | if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { | |||
2708 | assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?") ? void (0) : __assert_fail ( "Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2708, __extension__ __PRETTY_FUNCTION__)); | |||
2709 | Callee = DAG.getNode( | |||
2710 | ARMISD::WrapperPIC, dl, PtrVt, | |||
2711 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY)); | |||
2712 | Callee = DAG.getLoad( | |||
2713 | PtrVt, dl, DAG.getEntryNode(), Callee, | |||
2714 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(), | |||
2715 | MachineMemOperand::MODereferenceable | | |||
2716 | MachineMemOperand::MOInvariant); | |||
2717 | } else if (Subtarget->isTargetCOFF()) { | |||
2718 | assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2719, __extension__ __PRETTY_FUNCTION__)) | |||
2719 | "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2719, __extension__ __PRETTY_FUNCTION__)); | |||
2720 | unsigned TargetFlags = ARMII::MO_NO_FLAG; | |||
2721 | if (GVal->hasDLLImportStorageClass()) | |||
2722 | TargetFlags = ARMII::MO_DLLIMPORT; | |||
2723 | else if (!TM.shouldAssumeDSOLocal(*GVal->getParent(), GVal)) | |||
2724 | TargetFlags = ARMII::MO_COFFSTUB; | |||
2725 | Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0, | |||
2726 | TargetFlags); | |||
2727 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) | |||
2728 | Callee = | |||
2729 | DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), | |||
2730 | DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), | |||
2731 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | |||
2732 | } else { | |||
2733 | Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0); | |||
2734 | } | |||
2735 | } | |||
2736 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { | |||
2737 | isDirect = true; | |||
2738 | // tBX takes a register source operand. | |||
2739 | const char *Sym = S->getSymbol(); | |||
2740 | if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { | |||
2741 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | |||
2742 | ARMConstantPoolValue *CPV = | |||
2743 | ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, | |||
2744 | ARMPCLabelIndex, 4); | |||
2745 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | |||
2746 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | |||
2747 | Callee = DAG.getLoad( | |||
2748 | PtrVt, dl, DAG.getEntryNode(), CPAddr, | |||
2749 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
2750 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | |||
2751 | Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); | |||
2752 | } else { | |||
2753 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); | |||
2754 | } | |||
2755 | } | |||
2756 | ||||
2757 | if (isCmseNSCall) { | |||
2758 | assert(!isARMFunc && !isDirect &&(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2759, __extension__ __PRETTY_FUNCTION__)) | |||
2759 | "Cannot handle call to ARM function or direct call")(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2759, __extension__ __PRETTY_FUNCTION__)); | |||
2760 | if (NumBytes > 0) { | |||
2761 | DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(), | |||
2762 | "call to non-secure function would " | |||
2763 | "require passing arguments on stack", | |||
2764 | dl.getDebugLoc()); | |||
2765 | DAG.getContext()->diagnose(Diag); | |||
2766 | } | |||
2767 | if (isStructRet) { | |||
2768 | DiagnosticInfoUnsupported Diag( | |||
2769 | DAG.getMachineFunction().getFunction(), | |||
2770 | "call to non-secure function would return value through pointer", | |||
2771 | dl.getDebugLoc()); | |||
2772 | DAG.getContext()->diagnose(Diag); | |||
2773 | } | |||
2774 | } | |||
2775 | ||||
2776 | // FIXME: handle tail calls differently. | |||
2777 | unsigned CallOpc; | |||
2778 | if (Subtarget->isThumb()) { | |||
2779 | if (GuardWithBTI) | |||
2780 | CallOpc = ARMISD::t2CALL_BTI; | |||
2781 | else if (isCmseNSCall) | |||
2782 | CallOpc = ARMISD::tSECALL; | |||
2783 | else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) | |||
2784 | CallOpc = ARMISD::CALL_NOLINK; | |||
2785 | else | |||
2786 | CallOpc = ARMISD::CALL; | |||
2787 | } else { | |||
2788 | if (!isDirect && !Subtarget->hasV5TOps()) | |||
2789 | CallOpc = ARMISD::CALL_NOLINK; | |||
2790 | else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && | |||
2791 | // Emit regular call when code size is the priority | |||
2792 | !Subtarget->hasMinSize()) | |||
2793 | // "mov lr, pc; b _foo" to avoid confusing the RSP | |||
2794 | CallOpc = ARMISD::CALL_NOLINK; | |||
2795 | else | |||
2796 | CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; | |||
2797 | } | |||
2798 | ||||
2799 | // We don't usually want to end the call-sequence here because we would tidy | |||
2800 | // the frame up *after* the call, however in the ABI-changing tail-call case | |||
2801 | // we've carefully laid out the parameters so that when sp is reset they'll be | |||
2802 | // in the correct location. | |||
2803 | if (isTailCall && !isSibCall) { | |||
2804 | Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, dl); | |||
2805 | InFlag = Chain.getValue(1); | |||
2806 | } | |||
2807 | ||||
2808 | std::vector<SDValue> Ops; | |||
2809 | Ops.push_back(Chain); | |||
2810 | Ops.push_back(Callee); | |||
2811 | ||||
2812 | if (isTailCall) { | |||
2813 | Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32)); | |||
2814 | } | |||
2815 | ||||
2816 | // Add argument registers to the end of the list so that they are known live | |||
2817 | // into the call. | |||
2818 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) | |||
2819 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, | |||
2820 | RegsToPass[i].second.getValueType())); | |||
2821 | ||||
2822 | // Add a register mask operand representing the call-preserved registers. | |||
2823 | const uint32_t *Mask; | |||
2824 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); | |||
2825 | if (isThisReturn) { | |||
2826 | // For 'this' returns, use the R0-preserving mask if applicable | |||
2827 | Mask = ARI->getThisReturnPreservedMask(MF, CallConv); | |||
2828 | if (!Mask) { | |||
2829 | // Set isThisReturn to false if the calling convention is not one that | |||
2830 | // allows 'returned' to be modeled in this way, so LowerCallResult does | |||
2831 | // not try to pass 'this' straight through | |||
2832 | isThisReturn = false; | |||
2833 | Mask = ARI->getCallPreservedMask(MF, CallConv); | |||
2834 | } | |||
2835 | } else | |||
2836 | Mask = ARI->getCallPreservedMask(MF, CallConv); | |||
2837 | ||||
2838 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2838, __extension__ __PRETTY_FUNCTION__)); | |||
2839 | Ops.push_back(DAG.getRegisterMask(Mask)); | |||
2840 | ||||
2841 | if (InFlag.getNode()) | |||
2842 | Ops.push_back(InFlag); | |||
2843 | ||||
2844 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); | |||
2845 | if (isTailCall) { | |||
2846 | MF.getFrameInfo().setHasTailCall(); | |||
2847 | SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); | |||
2848 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); | |||
2849 | return Ret; | |||
2850 | } | |||
2851 | ||||
2852 | // Returns a chain and a flag for retval copy to use. | |||
2853 | Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); | |||
2854 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); | |||
2855 | InFlag = Chain.getValue(1); | |||
2856 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); | |||
2857 | ||||
2858 | // If we're guaranteeing tail-calls will be honoured, the callee must | |||
2859 | // pop its own argument stack on return. But this call is *not* a tail call so | |||
2860 | // we need to undo that after it returns to restore the status-quo. | |||
2861 | bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; | |||
2862 | uint64_t CalleePopBytes = | |||
2863 | canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL; | |||
2864 | ||||
2865 | Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl); | |||
2866 | if (!Ins.empty()) | |||
2867 | InFlag = Chain.getValue(1); | |||
2868 | ||||
2869 | // Handle result values, copying them out of physregs into vregs that we | |||
2870 | // return. | |||
2871 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, | |||
2872 | InVals, isThisReturn, | |||
2873 | isThisReturn ? OutVals[0] : SDValue()); | |||
2874 | } | |||
2875 | ||||
2876 | /// HandleByVal - Every parameter *after* a byval parameter is passed | |||
2877 | /// on the stack. Remember the next parameter register to allocate, | |||
2878 | /// and then confiscate the rest of the parameter registers to insure | |||
2879 | /// this. | |||
2880 | void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, | |||
2881 | Align Alignment) const { | |||
2882 | // Byval (as with any stack) slots are always at least 4 byte aligned. | |||
2883 | Alignment = std::max(Alignment, Align(4)); | |||
2884 | ||||
2885 | unsigned Reg = State->AllocateReg(GPRArgRegs); | |||
2886 | if (!Reg) | |||
2887 | return; | |||
2888 | ||||
2889 | unsigned AlignInRegs = Alignment.value() / 4; | |||
2890 | unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; | |||
2891 | for (unsigned i = 0; i < Waste; ++i) | |||
2892 | Reg = State->AllocateReg(GPRArgRegs); | |||
2893 | ||||
2894 | if (!Reg) | |||
2895 | return; | |||
2896 | ||||
2897 | unsigned Excess = 4 * (ARM::R4 - Reg); | |||
2898 | ||||
2899 | // Special case when NSAA != SP and parameter size greater than size of | |||
2900 | // all remained GPR regs. In that case we can't split parameter, we must | |||
2901 | // send it to stack. We also must set NCRN to R4, so waste all | |||
2902 | // remained registers. | |||
2903 | const unsigned NSAAOffset = State->getNextStackOffset(); | |||
2904 | if (NSAAOffset != 0 && Size > Excess) { | |||
2905 | while (State->AllocateReg(GPRArgRegs)) | |||
2906 | ; | |||
2907 | return; | |||
2908 | } | |||
2909 | ||||
2910 | // First register for byval parameter is the first register that wasn't | |||
2911 | // allocated before this method call, so it would be "reg". | |||
2912 | // If parameter is small enough to be saved in range [reg, r4), then | |||
2913 | // the end (first after last) register would be reg + param-size-in-regs, | |||
2914 | // else parameter would be splitted between registers and stack, | |||
2915 | // end register would be r4 in this case. | |||
2916 | unsigned ByValRegBegin = Reg; | |||
2917 | unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); | |||
2918 | State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); | |||
2919 | // Note, first register is allocated in the beginning of function already, | |||
2920 | // allocate remained amount of registers we need. | |||
2921 | for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) | |||
2922 | State->AllocateReg(GPRArgRegs); | |||
2923 | // A byval parameter that is split between registers and memory needs its | |||
2924 | // size truncated here. | |||
2925 | // In the case where the entire structure fits in registers, we set the | |||
2926 | // size in memory to zero. | |||
2927 | Size = std::max<int>(Size - Excess, 0); | |||
2928 | } | |||
2929 | ||||
2930 | /// MatchingStackOffset - Return true if the given stack call argument is | |||
2931 | /// already available in the same position (relatively) of the caller's | |||
2932 | /// incoming argument stack. | |||
2933 | static | |||
2934 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, | |||
2935 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, | |||
2936 | const TargetInstrInfo *TII) { | |||
2937 | unsigned Bytes = Arg.getValueSizeInBits() / 8; | |||
2938 | int FI = std::numeric_limits<int>::max(); | |||
2939 | if (Arg.getOpcode() == ISD::CopyFromReg) { | |||
2940 | Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); | |||
2941 | if (!VR.isVirtual()) | |||
2942 | return false; | |||
2943 | MachineInstr *Def = MRI->getVRegDef(VR); | |||
2944 | if (!Def) | |||
2945 | return false; | |||
2946 | if (!Flags.isByVal()) { | |||
2947 | if (!TII->isLoadFromStackSlot(*Def, FI)) | |||
2948 | return false; | |||
2949 | } else { | |||
2950 | return false; | |||
2951 | } | |||
2952 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { | |||
2953 | if (Flags.isByVal()) | |||
2954 | // ByVal argument is passed in as a pointer but it's now being | |||
2955 | // dereferenced. e.g. | |||
2956 | // define @foo(%struct.X* %A) { | |||
2957 | // tail call @bar(%struct.X* byval %A) | |||
2958 | // } | |||
2959 | return false; | |||
2960 | SDValue Ptr = Ld->getBasePtr(); | |||
2961 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); | |||
2962 | if (!FINode) | |||
2963 | return false; | |||
2964 | FI = FINode->getIndex(); | |||
2965 | } else | |||
2966 | return false; | |||
2967 | ||||
2968 | assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int> ::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2968, __extension__ __PRETTY_FUNCTION__)); | |||
2969 | if (!MFI.isFixedObjectIndex(FI)) | |||
2970 | return false; | |||
2971 | return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); | |||
2972 | } | |||
2973 | ||||
2974 | /// IsEligibleForTailCallOptimization - Check whether the call is eligible | |||
2975 | /// for tail call optimization. Targets which want to do tail call | |||
2976 | /// optimization should implement this function. | |||
2977 | bool ARMTargetLowering::IsEligibleForTailCallOptimization( | |||
2978 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, | |||
2979 | bool isCalleeStructRet, bool isCallerStructRet, | |||
2980 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
2981 | const SmallVectorImpl<SDValue> &OutVals, | |||
2982 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG, | |||
2983 | const bool isIndirect) const { | |||
2984 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2985 | const Function &CallerF = MF.getFunction(); | |||
2986 | CallingConv::ID CallerCC = CallerF.getCallingConv(); | |||
2987 | ||||
2988 | assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ? void (0) : __assert_fail ("Subtarget->supportsTailCall()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2988, __extension__ __PRETTY_FUNCTION__)); | |||
2989 | ||||
2990 | // Indirect tail calls cannot be optimized for Thumb1 if the args | |||
2991 | // to the call take up r0-r3. The reason is that there are no legal registers | |||
2992 | // left to hold the pointer to the function to be called. | |||
2993 | // Similarly, if the function uses return address sign and authentication, | |||
2994 | // r12 is needed to hold the PAC and is not available to hold the callee | |||
2995 | // address. | |||
2996 | if (Outs.size() >= 4 && | |||
2997 | (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) { | |||
2998 | if (Subtarget->isThumb1Only()) | |||
2999 | return false; | |||
3000 | // Conservatively assume the function spills LR. | |||
3001 | if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)) | |||
3002 | return false; | |||
3003 | } | |||
3004 | ||||
3005 | // Look for obvious safe cases to perform tail call optimization that do not | |||
3006 | // require ABI changes. This is what gcc calls sibcall. | |||
3007 | ||||
3008 | // Exception-handling functions need a special set of instructions to indicate | |||
3009 | // a return to the hardware. Tail-calling another function would probably | |||
3010 | // break this. | |||
3011 | if (CallerF.hasFnAttribute("interrupt")) | |||
3012 | return false; | |||
3013 | ||||
3014 | if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) | |||
3015 | return CalleeCC == CallerCC; | |||
3016 | ||||
3017 | // Also avoid sibcall optimization if either caller or callee uses struct | |||
3018 | // return semantics. | |||
3019 | if (isCalleeStructRet || isCallerStructRet) | |||
3020 | return false; | |||
3021 | ||||
3022 | // Externally-defined functions with weak linkage should not be | |||
3023 | // tail-called on ARM when the OS does not support dynamic | |||
3024 | // pre-emption of symbols, as the AAELF spec requires normal calls | |||
3025 | // to undefined weak functions to be replaced with a NOP or jump to the | |||
3026 | // next instruction. The behaviour of branch instructions in this | |||
3027 | // situation (as used for tail calls) is implementation-defined, so we | |||
3028 | // cannot rely on the linker replacing the tail call with a return. | |||
3029 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { | |||
3030 | const GlobalValue *GV = G->getGlobal(); | |||
3031 | const Triple &TT = getTargetMachine().getTargetTriple(); | |||
3032 | if (GV->hasExternalWeakLinkage() && | |||
3033 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) | |||
3034 | return false; | |||
3035 | } | |||
3036 | ||||
3037 | // Check that the call results are passed in the same way. | |||
3038 | LLVMContext &C = *DAG.getContext(); | |||
3039 | if (!CCState::resultsCompatible( | |||
3040 | getEffectiveCallingConv(CalleeCC, isVarArg), | |||
3041 | getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, | |||
3042 | CCAssignFnForReturn(CalleeCC, isVarArg), | |||
3043 | CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) | |||
3044 | return false; | |||
3045 | // The callee has to preserve all registers the caller needs to preserve. | |||
3046 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); | |||
3047 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); | |||
3048 | if (CalleeCC != CallerCC) { | |||
3049 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); | |||
3050 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) | |||
3051 | return false; | |||
3052 | } | |||
3053 | ||||
3054 | // If Caller's vararg or byval argument has been split between registers and | |||
3055 | // stack, do not perform tail call, since part of the argument is in caller's | |||
3056 | // local frame. | |||
3057 | const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); | |||
3058 | if (AFI_Caller->getArgRegsSaveSize()) | |||
3059 | return false; | |||
3060 | ||||
3061 | // If the callee takes no arguments then go on to check the results of the | |||
3062 | // call. | |||
3063 | if (!Outs.empty()) { | |||
3064 | // Check if stack adjustment is needed. For now, do not do this if any | |||
3065 | // argument is passed on the stack. | |||
3066 | SmallVector<CCValAssign, 16> ArgLocs; | |||
3067 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); | |||
3068 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); | |||
3069 | if (CCInfo.getNextStackOffset()) { | |||
3070 | // Check if the arguments are already laid out in the right way as | |||
3071 | // the caller's fixed stack objects. | |||
3072 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
3073 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); | |||
3074 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); | |||
3075 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); | |||
3076 | i != e; | |||
3077 | ++i, ++realArgIdx) { | |||
3078 | CCValAssign &VA = ArgLocs[i]; | |||
3079 | EVT RegVT = VA.getLocVT(); | |||
3080 | SDValue Arg = OutVals[realArgIdx]; | |||
3081 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; | |||
3082 | if (VA.getLocInfo() == CCValAssign::Indirect) | |||
3083 | return false; | |||
3084 | if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { | |||
3085 | // f64 and vector types are split into multiple registers or | |||
3086 | // register/stack-slot combinations. The types will not match | |||
3087 | // the registers; give up on memory f64 refs until we figure | |||
3088 | // out what to do about this. | |||
3089 | if (!VA.isRegLoc()) | |||
3090 | return false; | |||
3091 | if (!ArgLocs[++i].isRegLoc()) | |||
3092 | return false; | |||
3093 | if (RegVT == MVT::v2f64) { | |||
3094 | if (!ArgLocs[++i].isRegLoc()) | |||
3095 | return false; | |||
3096 | if (!ArgLocs[++i].isRegLoc()) | |||
3097 | return false; | |||
3098 | } | |||
3099 | } else if (!VA.isRegLoc()) { | |||
3100 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, | |||
3101 | MFI, MRI, TII)) | |||
3102 | return false; | |||
3103 | } | |||
3104 | } | |||
3105 | } | |||
3106 | ||||
3107 | const MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
3108 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) | |||
3109 | return false; | |||
3110 | } | |||
3111 | ||||
3112 | return true; | |||
3113 | } | |||
3114 | ||||
3115 | bool | |||
3116 | ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, | |||
3117 | MachineFunction &MF, bool isVarArg, | |||
3118 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
3119 | LLVMContext &Context) const { | |||
3120 | SmallVector<CCValAssign, 16> RVLocs; | |||
3121 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); | |||
3122 | return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); | |||
3123 | } | |||
3124 | ||||
3125 | static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, | |||
3126 | const SDLoc &DL, SelectionDAG &DAG) { | |||
3127 | const MachineFunction &MF = DAG.getMachineFunction(); | |||
3128 | const Function &F = MF.getFunction(); | |||
3129 | ||||
3130 | StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); | |||
3131 | ||||
3132 | // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset | |||
3133 | // version of the "preferred return address". These offsets affect the return | |||
3134 | // instruction if this is a return from PL1 without hypervisor extensions. | |||
3135 | // IRQ/FIQ: +4 "subs pc, lr, #4" | |||
3136 | // SWI: 0 "subs pc, lr, #0" | |||
3137 | // ABORT: +4 "subs pc, lr, #4" | |||
3138 | // UNDEF: +4/+2 "subs pc, lr, #0" | |||
3139 | // UNDEF varies depending on where the exception came from ARM or Thumb | |||
3140 | // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. | |||
3141 | ||||
3142 | int64_t LROffset; | |||
3143 | if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || | |||
3144 | IntKind == "ABORT") | |||
3145 | LROffset = 4; | |||
3146 | else if (IntKind == "SWI" || IntKind == "UNDEF") | |||
3147 | LROffset = 0; | |||
3148 | else | |||
3149 | report_fatal_error("Unsupported interrupt attribute. If present, value " | |||
3150 | "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); | |||
3151 | ||||
3152 | RetOps.insert(RetOps.begin() + 1, | |||
3153 | DAG.getConstant(LROffset, DL, MVT::i32, false)); | |||
3154 | ||||
3155 | return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); | |||
3156 | } | |||
3157 | ||||
3158 | SDValue | |||
3159 | ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, | |||
3160 | bool isVarArg, | |||
3161 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
3162 | const SmallVectorImpl<SDValue> &OutVals, | |||
3163 | const SDLoc &dl, SelectionDAG &DAG) const { | |||
3164 | // CCValAssign - represent the assignment of the return value to a location. | |||
3165 | SmallVector<CCValAssign, 16> RVLocs; | |||
3166 | ||||
3167 | // CCState - Info about the registers and stack slots. | |||
3168 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, | |||
3169 | *DAG.getContext()); | |||
3170 | ||||
3171 | // Analyze outgoing return values. | |||
3172 | CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); | |||
3173 | ||||
3174 | SDValue Flag; | |||
3175 | SmallVector<SDValue, 4> RetOps; | |||
3176 | RetOps.push_back(Chain); // Operand #0 = Chain (updated below) | |||
3177 | bool isLittleEndian = Subtarget->isLittle(); | |||
3178 | ||||
3179 | MachineFunction &MF = DAG.getMachineFunction(); | |||
3180 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
3181 | AFI->setReturnRegsCount(RVLocs.size()); | |||
3182 | ||||
3183 | // Report error if cmse entry function returns structure through first ptr arg. | |||
3184 | if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) { | |||
3185 | // Note: using an empty SDLoc(), as the first line of the function is a | |||
3186 | // better place to report than the last line. | |||
3187 | DiagnosticInfoUnsupported Diag( | |||
3188 | DAG.getMachineFunction().getFunction(), | |||
3189 | "secure entry function would return value through pointer", | |||
3190 | SDLoc().getDebugLoc()); | |||
3191 | DAG.getContext()->diagnose(Diag); | |||
3192 | } | |||
3193 | ||||
3194 | // Copy the result values into the output registers. | |||
3195 | for (unsigned i = 0, realRVLocIdx = 0; | |||
3196 | i != RVLocs.size(); | |||
3197 | ++i, ++realRVLocIdx) { | |||
3198 | CCValAssign &VA = RVLocs[i]; | |||
3199 | assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!" ) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3199, __extension__ __PRETTY_FUNCTION__)); | |||
3200 | ||||
3201 | SDValue Arg = OutVals[realRVLocIdx]; | |||
3202 | bool ReturnF16 = false; | |||
3203 | ||||
3204 | if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) { | |||
3205 | // Half-precision return values can be returned like this: | |||
3206 | // | |||
3207 | // t11 f16 = fadd ... | |||
3208 | // t12: i16 = bitcast t11 | |||
3209 | // t13: i32 = zero_extend t12 | |||
3210 | // t14: f32 = bitcast t13 <~~~~~~~ Arg | |||
3211 | // | |||
3212 | // to avoid code generation for bitcasts, we simply set Arg to the node | |||
3213 | // that produces the f16 value, t11 in this case. | |||
3214 | // | |||
3215 | if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) { | |||
3216 | SDValue ZE = Arg.getOperand(0); | |||
3217 | if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) { | |||
3218 | SDValue BC = ZE.getOperand(0); | |||
3219 | if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) { | |||
3220 | Arg = BC.getOperand(0); | |||
3221 | ReturnF16 = true; | |||
3222 | } | |||
3223 | } | |||
3224 | } | |||
3225 | } | |||
3226 | ||||
3227 | switch (VA.getLocInfo()) { | |||
3228 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3228); | |||
3229 | case CCValAssign::Full: break; | |||
3230 | case CCValAssign::BCvt: | |||
3231 | if (!ReturnF16) | |||
3232 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | |||
3233 | break; | |||
3234 | } | |||
3235 | ||||
3236 | // Mask f16 arguments if this is a CMSE nonsecure entry. | |||
3237 | auto RetVT = Outs[realRVLocIdx].ArgVT; | |||
3238 | if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) { | |||
3239 | if (VA.needsCustom() && VA.getValVT() == MVT::f16) { | |||
3240 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); | |||
3241 | } else { | |||
3242 | auto LocBits = VA.getLocVT().getSizeInBits(); | |||
3243 | auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits()); | |||
3244 | SDValue Mask = | |||
3245 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); | |||
3246 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); | |||
3247 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); | |||
3248 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | |||
3249 | } | |||
3250 | } | |||
3251 | ||||
3252 | if (VA.needsCustom() && | |||
3253 | (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) { | |||
3254 | if (VA.getLocVT() == MVT::v2f64) { | |||
3255 | // Extract the first half and return it in two registers. | |||
3256 | SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | |||
3257 | DAG.getConstant(0, dl, MVT::i32)); | |||
3258 | SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, | |||
3259 | DAG.getVTList(MVT::i32, MVT::i32), Half); | |||
3260 | ||||
3261 | Chain = | |||
3262 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | |||
3263 | HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag); | |||
3264 | Flag = Chain.getValue(1); | |||
3265 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | |||
3266 | VA = RVLocs[++i]; // skip ahead to next loc | |||
3267 | Chain = | |||
3268 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | |||
3269 | HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag); | |||
3270 | Flag = Chain.getValue(1); | |||
3271 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | |||
3272 | VA = RVLocs[++i]; // skip ahead to next loc | |||
3273 | ||||
3274 | // Extract the 2nd half and fall through to handle it as an f64 value. | |||
3275 | Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | |||
3276 | DAG.getConstant(1, dl, MVT::i32)); | |||
3277 | } | |||
3278 | // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is | |||
3279 | // available. | |||
3280 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, | |||
3281 | DAG.getVTList(MVT::i32, MVT::i32), Arg); | |||
3282 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | |||
3283 | fmrrd.getValue(isLittleEndian ? 0 : 1), Flag); | |||
3284 | Flag = Chain.getValue(1); | |||
3285 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | |||
3286 | VA = RVLocs[++i]; // skip ahead to next loc | |||
3287 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | |||
3288 | fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); | |||
3289 | } else | |||
3290 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); | |||
3291 | ||||
3292 | // Guarantee that all emitted copies are | |||
3293 | // stuck together, avoiding something bad. | |||
3294 | Flag = Chain.getValue(1); | |||
3295 | RetOps.push_back(DAG.getRegister( | |||
3296 | VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT())); | |||
3297 | } | |||
3298 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); | |||
3299 | const MCPhysReg *I = | |||
3300 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); | |||
3301 | if (I) { | |||
3302 | for (; *I; ++I) { | |||
3303 | if (ARM::GPRRegClass.contains(*I)) | |||
3304 | RetOps.push_back(DAG.getRegister(*I, MVT::i32)); | |||
3305 | else if (ARM::DPRRegClass.contains(*I)) | |||
3306 | RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); | |||
3307 | else | |||
3308 | llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3308); | |||
3309 | } | |||
3310 | } | |||
3311 | ||||
3312 | // Update chain and glue. | |||
3313 | RetOps[0] = Chain; | |||
3314 | if (Flag.getNode()) | |||
3315 | RetOps.push_back(Flag); | |||
3316 | ||||
3317 | // CPUs which aren't M-class use a special sequence to return from | |||
3318 | // exceptions (roughly, any instruction setting pc and cpsr simultaneously, | |||
3319 | // though we use "subs pc, lr, #N"). | |||
3320 | // | |||
3321 | // M-class CPUs actually use a normal return sequence with a special | |||
3322 | // (hardware-provided) value in LR, so the normal code path works. | |||
3323 | if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && | |||
3324 | !Subtarget->isMClass()) { | |||
3325 | if (Subtarget->isThumb1Only()) | |||
3326 | report_fatal_error("interrupt attribute is not supported in Thumb1"); | |||
3327 | return LowerInterruptReturn(RetOps, dl, DAG); | |||
3328 | } | |||
3329 | ||||
3330 | ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG : | |||
3331 | ARMISD::RET_FLAG; | |||
3332 | return DAG.getNode(RetNode, dl, MVT::Other, RetOps); | |||
3333 | } | |||
3334 | ||||
3335 | bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { | |||
3336 | if (N->getNumValues() != 1) | |||
3337 | return false; | |||
3338 | if (!N->hasNUsesOfValue(1, 0)) | |||
3339 | return false; | |||
3340 | ||||
3341 | SDValue TCChain = Chain; | |||
3342 | SDNode *Copy = *N->use_begin(); | |||
3343 | if (Copy->getOpcode() == ISD::CopyToReg) { | |||
3344 | // If the copy has a glue operand, we conservatively assume it isn't safe to | |||
3345 | // perform a tail call. | |||
3346 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) | |||
3347 | return false; | |||
3348 | TCChain = Copy->getOperand(0); | |||
3349 | } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { | |||
3350 | SDNode *VMov = Copy; | |||
3351 | // f64 returned in a pair of GPRs. | |||
3352 | SmallPtrSet<SDNode*, 2> Copies; | |||
3353 | for (SDNode *U : VMov->uses()) { | |||
3354 | if (U->getOpcode() != ISD::CopyToReg) | |||
3355 | return false; | |||
3356 | Copies.insert(U); | |||
3357 | } | |||
3358 | if (Copies.size() > 2) | |||
3359 | return false; | |||
3360 | ||||
3361 | for (SDNode *U : VMov->uses()) { | |||
3362 | SDValue UseChain = U->getOperand(0); | |||
3363 | if (Copies.count(UseChain.getNode())) | |||
3364 | // Second CopyToReg | |||
3365 | Copy = U; | |||
3366 | else { | |||
3367 | // We are at the top of this chain. | |||
3368 | // If the copy has a glue operand, we conservatively assume it | |||
3369 | // isn't safe to perform a tail call. | |||
3370 | if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue) | |||
3371 | return false; | |||
3372 | // First CopyToReg | |||
3373 | TCChain = UseChain; | |||
3374 | } | |||
3375 | } | |||
3376 | } else if (Copy->getOpcode() == ISD::BITCAST) { | |||
3377 | // f32 returned in a single GPR. | |||
3378 | if (!Copy->hasOneUse()) | |||
3379 | return false; | |||
3380 | Copy = *Copy->use_begin(); | |||
3381 | if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) | |||
3382 | return false; | |||
3383 | // If the copy has a glue operand, we conservatively assume it isn't safe to | |||
3384 | // perform a tail call. | |||
3385 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) | |||
3386 | return false; | |||
3387 | TCChain = Copy->getOperand(0); | |||
3388 | } else { | |||
3389 | return false; | |||
3390 | } | |||
3391 | ||||
3392 | bool HasRet = false; | |||
3393 | for (const SDNode *U : Copy->uses()) { | |||
3394 | if (U->getOpcode() != ARMISD::RET_FLAG && | |||
3395 | U->getOpcode() != ARMISD::INTRET_FLAG) | |||
3396 | return false; | |||
3397 | HasRet = true; | |||
3398 | } | |||
3399 | ||||
3400 | if (!HasRet) | |||
3401 | return false; | |||
3402 | ||||
3403 | Chain = TCChain; | |||
3404 | return true; | |||
3405 | } | |||
3406 | ||||
3407 | bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { | |||
3408 | if (!Subtarget->supportsTailCall()) | |||
3409 | return false; | |||
3410 | ||||
3411 | if (!CI->isTailCall()) | |||
3412 | return false; | |||
3413 | ||||
3414 | return true; | |||
3415 | } | |||
3416 | ||||
3417 | // Trying to write a 64 bit value so need to split into two 32 bit values first, | |||
3418 | // and pass the lower and high parts through. | |||
3419 | static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { | |||
3420 | SDLoc DL(Op); | |||
3421 | SDValue WriteValue = Op->getOperand(2); | |||
3422 | ||||
3423 | // This function is only supposed to be called for i64 type argument. | |||
3424 | assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3425, __extension__ __PRETTY_FUNCTION__)) | |||
3425 | && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3425, __extension__ __PRETTY_FUNCTION__)); | |||
3426 | ||||
3427 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, | |||
3428 | DAG.getConstant(0, DL, MVT::i32)); | |||
3429 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, | |||
3430 | DAG.getConstant(1, DL, MVT::i32)); | |||
3431 | SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; | |||
3432 | return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); | |||
3433 | } | |||
3434 | ||||
3435 | // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as | |||
3436 | // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is | |||
3437 | // one of the above mentioned nodes. It has to be wrapped because otherwise | |||
3438 | // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only | |||
3439 | // be used to form addressing mode. These wrapped nodes will be selected | |||
3440 | // into MOVi. | |||
3441 | SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, | |||
3442 | SelectionDAG &DAG) const { | |||
3443 | EVT PtrVT = Op.getValueType(); | |||
3444 | // FIXME there is no actual debug info here | |||
3445 | SDLoc dl(Op); | |||
3446 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); | |||
3447 | SDValue Res; | |||
3448 | ||||
3449 | // When generating execute-only code Constant Pools must be promoted to the | |||
3450 | // global data section. It's a bit ugly that we can't share them across basic | |||
3451 | // blocks, but this way we guarantee that execute-only behaves correct with | |||
3452 | // position-independent addressing modes. | |||
3453 | if (Subtarget->genExecuteOnly()) { | |||
3454 | auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); | |||
3455 | auto T = const_cast<Type*>(CP->getType()); | |||
3456 | auto C = const_cast<Constant*>(CP->getConstVal()); | |||
3457 | auto M = const_cast<Module*>(DAG.getMachineFunction(). | |||
3458 | getFunction().getParent()); | |||
3459 | auto GV = new GlobalVariable( | |||
3460 | *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, | |||
3461 | Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + | |||
3462 | Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + | |||
3463 | Twine(AFI->createPICLabelUId()) | |||
3464 | ); | |||
3465 | SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV), | |||
3466 | dl, PtrVT); | |||
3467 | return LowerGlobalAddress(GA, DAG); | |||
3468 | } | |||
3469 | ||||
3470 | // The 16-bit ADR instruction can only encode offsets that are multiples of 4, | |||
3471 | // so we need to align to at least 4 bytes when we don't have 32-bit ADR. | |||
3472 | Align CPAlign = CP->getAlign(); | |||
3473 | if (Subtarget->isThumb1Only()) | |||
3474 | CPAlign = std::max(CPAlign, Align(4)); | |||
3475 | if (CP->isMachineConstantPoolEntry()) | |||
3476 | Res = | |||
3477 | DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign); | |||
3478 | else | |||
3479 | Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign); | |||
3480 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); | |||
3481 | } | |||
3482 | ||||
3483 | unsigned ARMTargetLowering::getJumpTableEncoding() const { | |||
3484 | return MachineJumpTableInfo::EK_Inline; | |||
3485 | } | |||
3486 | ||||
3487 | SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, | |||
3488 | SelectionDAG &DAG) const { | |||
3489 | MachineFunction &MF = DAG.getMachineFunction(); | |||
3490 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
3491 | unsigned ARMPCLabelIndex = 0; | |||
3492 | SDLoc DL(Op); | |||
3493 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
3494 | const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); | |||
3495 | SDValue CPAddr; | |||
3496 | bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); | |||
3497 | if (!IsPositionIndependent) { | |||
3498 | CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4)); | |||
3499 | } else { | |||
3500 | unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; | |||
3501 | ARMPCLabelIndex = AFI->createPICLabelUId(); | |||
3502 | ARMConstantPoolValue *CPV = | |||
3503 | ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, | |||
3504 | ARMCP::CPBlockAddress, PCAdj); | |||
3505 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | |||
3506 | } | |||
3507 | CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); | |||
3508 | SDValue Result = DAG.getLoad( | |||
3509 | PtrVT, DL, DAG.getEntryNode(), CPAddr, | |||
3510 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3511 | if (!IsPositionIndependent) | |||
3512 | return Result; | |||
3513 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); | |||
3514 | return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); | |||
3515 | } | |||
3516 | ||||
3517 | /// Convert a TLS address reference into the correct sequence of loads | |||
3518 | /// and calls to compute the variable's address for Darwin, and return an | |||
3519 | /// SDValue containing the final node. | |||
3520 | ||||
3521 | /// Darwin only has one TLS scheme which must be capable of dealing with the | |||
3522 | /// fully general situation, in the worst case. This means: | |||
3523 | /// + "extern __thread" declaration. | |||
3524 | /// + Defined in a possibly unknown dynamic library. | |||
3525 | /// | |||
3526 | /// The general system is that each __thread variable has a [3 x i32] descriptor | |||
3527 | /// which contains information used by the runtime to calculate the address. The | |||
3528 | /// only part of this the compiler needs to know about is the first word, which | |||
3529 | /// contains a function pointer that must be called with the address of the | |||
3530 | /// entire descriptor in "r0". | |||
3531 | /// | |||
3532 | /// Since this descriptor may be in a different unit, in general access must | |||
3533 | /// proceed along the usual ARM rules. A common sequence to produce is: | |||
3534 | /// | |||
3535 | /// movw rT1, :lower16:_var$non_lazy_ptr | |||
3536 | /// movt rT1, :upper16:_var$non_lazy_ptr | |||
3537 | /// ldr r0, [rT1] | |||
3538 | /// ldr rT2, [r0] | |||
3539 | /// blx rT2 | |||
3540 | /// [...address now in r0...] | |||
3541 | SDValue | |||
3542 | ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, | |||
3543 | SelectionDAG &DAG) const { | |||
3544 | assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3545, __extension__ __PRETTY_FUNCTION__)) | |||
3545 | "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3545, __extension__ __PRETTY_FUNCTION__)); | |||
3546 | SDLoc DL(Op); | |||
3547 | ||||
3548 | // First step is to get the address of the actua global symbol. This is where | |||
3549 | // the TLS descriptor lives. | |||
3550 | SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); | |||
3551 | ||||
3552 | // The first entry in the descriptor is a function pointer that we must call | |||
3553 | // to obtain the address of the variable. | |||
3554 | SDValue Chain = DAG.getEntryNode(); | |||
3555 | SDValue FuncTLVGet = DAG.getLoad( | |||
3556 | MVT::i32, DL, Chain, DescAddr, | |||
3557 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4), | |||
3558 | MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | | |||
3559 | MachineMemOperand::MOInvariant); | |||
3560 | Chain = FuncTLVGet.getValue(1); | |||
3561 | ||||
3562 | MachineFunction &F = DAG.getMachineFunction(); | |||
3563 | MachineFrameInfo &MFI = F.getFrameInfo(); | |||
3564 | MFI.setAdjustsStack(true); | |||
3565 | ||||
3566 | // TLS calls preserve all registers except those that absolutely must be | |||
3567 | // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be | |||
3568 | // silly). | |||
3569 | auto TRI = | |||
3570 | getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); | |||
3571 | auto ARI = static_cast<const ARMRegisterInfo *>(TRI); | |||
3572 | const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); | |||
3573 | ||||
3574 | // Finally, we can make the call. This is just a degenerate version of a | |||
3575 | // normal AArch64 call node: r0 takes the address of the descriptor, and | |||
3576 | // returns the address of the variable in this thread. | |||
3577 | Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); | |||
3578 | Chain = | |||
3579 | DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), | |||
3580 | Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), | |||
3581 | DAG.getRegisterMask(Mask), Chain.getValue(1)); | |||
3582 | return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); | |||
3583 | } | |||
3584 | ||||
3585 | SDValue | |||
3586 | ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, | |||
3587 | SelectionDAG &DAG) const { | |||
3588 | assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows specific TLS lowering") ? void (0) : __assert_fail ( "Subtarget->isTargetWindows() && \"Windows specific TLS lowering\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3588, __extension__ __PRETTY_FUNCTION__)); | |||
3589 | ||||
3590 | SDValue Chain = DAG.getEntryNode(); | |||
3591 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
3592 | SDLoc DL(Op); | |||
3593 | ||||
3594 | // Load the current TEB (thread environment block) | |||
3595 | SDValue Ops[] = {Chain, | |||
3596 | DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), | |||
3597 | DAG.getTargetConstant(15, DL, MVT::i32), | |||
3598 | DAG.getTargetConstant(0, DL, MVT::i32), | |||
3599 | DAG.getTargetConstant(13, DL, MVT::i32), | |||
3600 | DAG.getTargetConstant(0, DL, MVT::i32), | |||
3601 | DAG.getTargetConstant(2, DL, MVT::i32)}; | |||
3602 | SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, | |||
3603 | DAG.getVTList(MVT::i32, MVT::Other), Ops); | |||
3604 | ||||
3605 | SDValue TEB = CurrentTEB.getValue(0); | |||
3606 | Chain = CurrentTEB.getValue(1); | |||
3607 | ||||
3608 | // Load the ThreadLocalStoragePointer from the TEB | |||
3609 | // A pointer to the TLS array is located at offset 0x2c from the TEB. | |||
3610 | SDValue TLSArray = | |||
3611 | DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); | |||
3612 | TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); | |||
3613 | ||||
3614 | // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 | |||
3615 | // offset into the TLSArray. | |||
3616 | ||||
3617 | // Load the TLS index from the C runtime | |||
3618 | SDValue TLSIndex = | |||
3619 | DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); | |||
3620 | TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); | |||
3621 | TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); | |||
3622 | ||||
3623 | SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, | |||
3624 | DAG.getConstant(2, DL, MVT::i32)); | |||
3625 | SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, | |||
3626 | DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), | |||
3627 | MachinePointerInfo()); | |||
3628 | ||||
3629 | // Get the offset of the start of the .tls section (section base) | |||
3630 | const auto *GA = cast<GlobalAddressSDNode>(Op); | |||
3631 | auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); | |||
3632 | SDValue Offset = DAG.getLoad( | |||
3633 | PtrVT, DL, Chain, | |||
3634 | DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, | |||
3635 | DAG.getTargetConstantPool(CPV, PtrVT, Align(4))), | |||
3636 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3637 | ||||
3638 | return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); | |||
3639 | } | |||
3640 | ||||
3641 | // Lower ISD::GlobalTLSAddress using the "general dynamic" model | |||
3642 | SDValue | |||
3643 | ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, | |||
3644 | SelectionDAG &DAG) const { | |||
3645 | SDLoc dl(GA); | |||
3646 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
3647 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; | |||
3648 | MachineFunction &MF = DAG.getMachineFunction(); | |||
3649 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
3650 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | |||
3651 | ARMConstantPoolValue *CPV = | |||
3652 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, | |||
3653 | ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); | |||
3654 | SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | |||
3655 | Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); | |||
3656 | Argument = DAG.getLoad( | |||
3657 | PtrVT, dl, DAG.getEntryNode(), Argument, | |||
3658 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3659 | SDValue Chain = Argument.getValue(1); | |||
3660 | ||||
3661 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | |||
3662 | Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); | |||
3663 | ||||
3664 | // call __tls_get_addr. | |||
3665 | ArgListTy Args; | |||
3666 | ArgListEntry Entry; | |||
3667 | Entry.Node = Argument; | |||
3668 | Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); | |||
3669 | Args.push_back(Entry); | |||
3670 | ||||
3671 | // FIXME: is there useful debug info available here? | |||
3672 | TargetLowering::CallLoweringInfo CLI(DAG); | |||
3673 | CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( | |||
3674 | CallingConv::C, Type::getInt32Ty(*DAG.getContext()), | |||
3675 | DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); | |||
3676 | ||||
3677 | std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); | |||
3678 | return CallResult.first; | |||
3679 | } | |||
3680 | ||||
3681 | // Lower ISD::GlobalTLSAddress using the "initial exec" or | |||
3682 | // "local exec" model. | |||
3683 | SDValue | |||
3684 | ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, | |||
3685 | SelectionDAG &DAG, | |||
3686 | TLSModel::Model model) const { | |||
3687 | const GlobalValue *GV = GA->getGlobal(); | |||
3688 | SDLoc dl(GA); | |||
3689 | SDValue Offset; | |||
3690 | SDValue Chain = DAG.getEntryNode(); | |||
3691 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
3692 | // Get the Thread Pointer | |||
3693 | SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); | |||
3694 | ||||
3695 | if (model == TLSModel::InitialExec) { | |||
3696 | MachineFunction &MF = DAG.getMachineFunction(); | |||
3697 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
3698 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | |||
3699 | // Initial exec model. | |||
3700 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; | |||
3701 | ARMConstantPoolValue *CPV = | |||
3702 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, | |||
3703 | ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, | |||
3704 | true); | |||
3705 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | |||
3706 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); | |||
3707 | Offset = DAG.getLoad( | |||
3708 | PtrVT, dl, Chain, Offset, | |||
3709 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3710 | Chain = Offset.getValue(1); | |||
3711 | ||||
3712 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | |||
3713 | Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); | |||
3714 | ||||
3715 | Offset = DAG.getLoad( | |||
3716 | PtrVT, dl, Chain, Offset, | |||
3717 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3718 | } else { | |||
3719 | // local exec model | |||
3720 | assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void (0) : __assert_fail ("model == TLSModel::LocalExec", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3720, __extension__ __PRETTY_FUNCTION__)); | |||
3721 | ARMConstantPoolValue *CPV = | |||
3722 | ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); | |||
3723 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | |||
3724 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); | |||
3725 | Offset = DAG.getLoad( | |||
3726 | PtrVT, dl, Chain, Offset, | |||
3727 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3728 | } | |||
3729 | ||||
3730 | // The address of the thread local variable is the add of the thread | |||
3731 | // pointer with the offset of the variable. | |||
3732 | return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); | |||
3733 | } | |||
3734 | ||||
3735 | SDValue | |||
3736 | ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { | |||
3737 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); | |||
3738 | if (DAG.getTarget().useEmulatedTLS()) | |||
3739 | return LowerToTLSEmulatedModel(GA, DAG); | |||
3740 | ||||
3741 | if (Subtarget->isTargetDarwin()) | |||
3742 | return LowerGlobalTLSAddressDarwin(Op, DAG); | |||
3743 | ||||
3744 | if (Subtarget->isTargetWindows()) | |||
3745 | return LowerGlobalTLSAddressWindows(Op, DAG); | |||
3746 | ||||
3747 | // TODO: implement the "local dynamic" model | |||
3748 | assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() && "Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3748, __extension__ __PRETTY_FUNCTION__)); | |||
3749 | TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); | |||
3750 | ||||
3751 | switch (model) { | |||
3752 | case TLSModel::GeneralDynamic: | |||
3753 | case TLSModel::LocalDynamic: | |||
3754 | return LowerToTLSGeneralDynamicModel(GA, DAG); | |||
3755 | case TLSModel::InitialExec: | |||
3756 | case TLSModel::LocalExec: | |||
3757 | return LowerToTLSExecModels(GA, DAG, model); | |||
3758 | } | |||
3759 | llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3759); | |||
3760 | } | |||
3761 | ||||
3762 | /// Return true if all users of V are within function F, looking through | |||
3763 | /// ConstantExprs. | |||
3764 | static bool allUsersAreInFunction(const Value *V, const Function *F) { | |||
3765 | SmallVector<const User*,4> Worklist(V->users()); | |||
3766 | while (!Worklist.empty()) { | |||
3767 | auto *U = Worklist.pop_back_val(); | |||
3768 | if (isa<ConstantExpr>(U)) { | |||
3769 | append_range(Worklist, U->users()); | |||
3770 | continue; | |||
3771 | } | |||
3772 | ||||
3773 | auto *I = dyn_cast<Instruction>(U); | |||
3774 | if (!I || I->getParent()->getParent() != F) | |||
3775 | return false; | |||
3776 | } | |||
3777 | return true; | |||
3778 | } | |||
3779 | ||||
3780 | static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, | |||
3781 | const GlobalValue *GV, SelectionDAG &DAG, | |||
3782 | EVT PtrVT, const SDLoc &dl) { | |||
3783 | // If we're creating a pool entry for a constant global with unnamed address, | |||
3784 | // and the global is small enough, we can emit it inline into the constant pool | |||
3785 | // to save ourselves an indirection. | |||
3786 | // | |||
3787 | // This is a win if the constant is only used in one function (so it doesn't | |||
3788 | // need to be duplicated) or duplicating the constant wouldn't increase code | |||
3789 | // size (implying the constant is no larger than 4 bytes). | |||
3790 | const Function &F = DAG.getMachineFunction().getFunction(); | |||
3791 | ||||
3792 | // We rely on this decision to inline being idemopotent and unrelated to the | |||
3793 | // use-site. We know that if we inline a variable at one use site, we'll | |||
3794 | // inline it elsewhere too (and reuse the constant pool entry). Fast-isel | |||
3795 | // doesn't know about this optimization, so bail out if it's enabled else | |||
3796 | // we could decide to inline here (and thus never emit the GV) but require | |||
3797 | // the GV from fast-isel generated code. | |||
3798 | if (!EnableConstpoolPromotion || | |||
3799 | DAG.getMachineFunction().getTarget().Options.EnableFastISel) | |||
3800 | return SDValue(); | |||
3801 | ||||
3802 | auto *GVar = dyn_cast<GlobalVariable>(GV); | |||
3803 | if (!GVar || !GVar->hasInitializer() || | |||
3804 | !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || | |||
3805 | !GVar->hasLocalLinkage()) | |||
3806 | return SDValue(); | |||
3807 | ||||
3808 | // If we inline a value that contains relocations, we move the relocations | |||
3809 | // from .data to .text. This is not allowed in position-independent code. | |||
3810 | auto *Init = GVar->getInitializer(); | |||
3811 | if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) && | |||
3812 | Init->needsDynamicRelocation()) | |||
3813 | return SDValue(); | |||
3814 | ||||
3815 | // The constant islands pass can only really deal with alignment requests | |||
3816 | // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote | |||
3817 | // any type wanting greater alignment requirements than 4 bytes. We also | |||
3818 | // can only promote constants that are multiples of 4 bytes in size or | |||
3819 | // are paddable to a multiple of 4. Currently we only try and pad constants | |||
3820 | // that are strings for simplicity. | |||
3821 | auto *CDAInit = dyn_cast<ConstantDataArray>(Init); | |||
3822 | unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); | |||
3823 | Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar); | |||
3824 | unsigned RequiredPadding = 4 - (Size % 4); | |||
3825 | bool PaddingPossible = | |||
3826 | RequiredPadding == 4 || (CDAInit && CDAInit->isString()); | |||
3827 | if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize || | |||
3828 | Size == 0) | |||
3829 | return SDValue(); | |||
3830 | ||||
3831 | unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); | |||
3832 | MachineFunction &MF = DAG.getMachineFunction(); | |||
3833 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
3834 | ||||
3835 | // We can't bloat the constant pool too much, else the ConstantIslands pass | |||
3836 | // may fail to converge. If we haven't promoted this global yet (it may have | |||
3837 | // multiple uses), and promoting it would increase the constant pool size (Sz | |||
3838 | // > 4), ensure we have space to do so up to MaxTotal. | |||
3839 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) | |||
3840 | if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= | |||
3841 | ConstpoolPromotionMaxTotal) | |||
3842 | return SDValue(); | |||
3843 | ||||
3844 | // This is only valid if all users are in a single function; we can't clone | |||
3845 | // the constant in general. The LLVM IR unnamed_addr allows merging | |||
3846 | // constants, but not cloning them. | |||
3847 | // | |||
3848 | // We could potentially allow cloning if we could prove all uses of the | |||
3849 | // constant in the current function don't care about the address, like | |||
3850 | // printf format strings. But that isn't implemented for now. | |||
3851 | if (!allUsersAreInFunction(GVar, &F)) | |||
3852 | return SDValue(); | |||
3853 | ||||
3854 | // We're going to inline this global. Pad it out if needed. | |||
3855 | if (RequiredPadding != 4) { | |||
3856 | StringRef S = CDAInit->getAsString(); | |||
3857 | ||||
3858 | SmallVector<uint8_t,16> V(S.size()); | |||
3859 | std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); | |||
3860 | while (RequiredPadding--) | |||
3861 | V.push_back(0); | |||
3862 | Init = ConstantDataArray::get(*DAG.getContext(), V); | |||
3863 | } | |||
3864 | ||||
3865 | auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); | |||
3866 | SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4)); | |||
3867 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { | |||
3868 | AFI->markGlobalAsPromotedToConstantPool(GVar); | |||
3869 | AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + | |||
3870 | PaddedSize - 4); | |||
3871 | } | |||
3872 | ++NumConstpoolPromoted; | |||
3873 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | |||
3874 | } | |||
3875 | ||||
3876 | bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const { | |||
3877 | if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) | |||
3878 | if (!(GV = GA->getAliaseeObject())) | |||
3879 | return false; | |||
3880 | if (const auto *V = dyn_cast<GlobalVariable>(GV)) | |||
3881 | return V->isConstant(); | |||
3882 | return isa<Function>(GV); | |||
3883 | } | |||
3884 | ||||
3885 | SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, | |||
3886 | SelectionDAG &DAG) const { | |||
3887 | switch (Subtarget->getTargetTriple().getObjectFormat()) { | |||
3888 | default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3888); | |||
3889 | case Triple::COFF: | |||
3890 | return LowerGlobalAddressWindows(Op, DAG); | |||
3891 | case Triple::ELF: | |||
3892 | return LowerGlobalAddressELF(Op, DAG); | |||
3893 | case Triple::MachO: | |||
3894 | return LowerGlobalAddressDarwin(Op, DAG); | |||
3895 | } | |||
3896 | } | |||
3897 | ||||
3898 | SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, | |||
3899 | SelectionDAG &DAG) const { | |||
3900 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
3901 | SDLoc dl(Op); | |||
3902 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | |||
3903 | const TargetMachine &TM = getTargetMachine(); | |||
3904 | bool IsRO = isReadOnly(GV); | |||
3905 | ||||
3906 | // promoteToConstantPool only if not generating XO text section | |||
3907 | if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) | |||
3908 | if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl)) | |||
3909 | return V; | |||
3910 | ||||
3911 | if (isPositionIndependent()) { | |||
3912 | bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); | |||
3913 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, | |||
3914 | UseGOT_PREL ? ARMII::MO_GOT : 0); | |||
3915 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); | |||
3916 | if (UseGOT_PREL) | |||
3917 | Result = | |||
3918 | DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, | |||
3919 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | |||
3920 | return Result; | |||
3921 | } else if (Subtarget->isROPI() && IsRO) { | |||
3922 | // PC-relative. | |||
3923 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); | |||
3924 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); | |||
3925 | return Result; | |||
3926 | } else if (Subtarget->isRWPI() && !IsRO) { | |||
3927 | // SB-relative. | |||
3928 | SDValue RelAddr; | |||
3929 | if (Subtarget->useMovt()) { | |||
3930 | ++NumMovwMovt; | |||
3931 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); | |||
3932 | RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); | |||
3933 | } else { // use literal pool for address constant | |||
3934 | ARMConstantPoolValue *CPV = | |||
3935 | ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); | |||
3936 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | |||
3937 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | |||
3938 | RelAddr = DAG.getLoad( | |||
3939 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | |||
3940 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3941 | } | |||
3942 | SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); | |||
3943 | SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr); | |||
3944 | return Result; | |||
3945 | } | |||
3946 | ||||
3947 | // If we have T2 ops, we can materialize the address directly via movt/movw | |||
3948 | // pair. This is always cheaper. | |||
3949 | if (Subtarget->useMovt()) { | |||
3950 | ++NumMovwMovt; | |||
3951 | // FIXME: Once remat is capable of dealing with instructions with register | |||
3952 | // operands, expand this into two nodes. | |||
3953 | return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, | |||
3954 | DAG.getTargetGlobalAddress(GV, dl, PtrVT)); | |||
3955 | } else { | |||
3956 | SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4)); | |||
3957 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | |||
3958 | return DAG.getLoad( | |||
3959 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | |||
3960 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
3961 | } | |||
3962 | } | |||
3963 | ||||
3964 | SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, | |||
3965 | SelectionDAG &DAG) const { | |||
3966 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3967, __extension__ __PRETTY_FUNCTION__)) | |||
3967 | "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3967, __extension__ __PRETTY_FUNCTION__)); | |||
3968 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
3969 | SDLoc dl(Op); | |||
3970 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | |||
3971 | ||||
3972 | if (Subtarget->useMovt()) | |||
3973 | ++NumMovwMovt; | |||
3974 | ||||
3975 | // FIXME: Once remat is capable of dealing with instructions with register | |||
3976 | // operands, expand this into multiple nodes | |||
3977 | unsigned Wrapper = | |||
3978 | isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; | |||
3979 | ||||
3980 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); | |||
3981 | SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); | |||
3982 | ||||
3983 | if (Subtarget->isGVIndirectSymbol(GV)) | |||
3984 | Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, | |||
3985 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | |||
3986 | return Result; | |||
3987 | } | |||
3988 | ||||
3989 | SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, | |||
3990 | SelectionDAG &DAG) const { | |||
3991 | assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() && "non-Windows COFF is not supported") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3991, __extension__ __PRETTY_FUNCTION__)); | |||
3992 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3993, __extension__ __PRETTY_FUNCTION__)) | |||
3993 | "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3993, __extension__ __PRETTY_FUNCTION__)); | |||
3994 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3995, __extension__ __PRETTY_FUNCTION__)) | |||
3995 | "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3995, __extension__ __PRETTY_FUNCTION__)); | |||
3996 | ||||
3997 | const TargetMachine &TM = getTargetMachine(); | |||
3998 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | |||
3999 | ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG; | |||
4000 | if (GV->hasDLLImportStorageClass()) | |||
4001 | TargetFlags = ARMII::MO_DLLIMPORT; | |||
4002 | else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) | |||
4003 | TargetFlags = ARMII::MO_COFFSTUB; | |||
4004 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
4005 | SDValue Result; | |||
4006 | SDLoc DL(Op); | |||
4007 | ||||
4008 | ++NumMovwMovt; | |||
4009 | ||||
4010 | // FIXME: Once remat is capable of dealing with instructions with register | |||
4011 | // operands, expand this into two nodes. | |||
4012 | Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, | |||
4013 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0, | |||
4014 | TargetFlags)); | |||
4015 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) | |||
4016 | Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, | |||
4017 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | |||
4018 | return Result; | |||
4019 | } | |||
4020 | ||||
4021 | SDValue | |||
4022 | ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { | |||
4023 | SDLoc dl(Op); | |||
4024 | SDValue Val = DAG.getConstant(0, dl, MVT::i32); | |||
4025 | return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, | |||
4026 | DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), | |||
4027 | Op.getOperand(1), Val); | |||
4028 | } | |||
4029 | ||||
4030 | SDValue | |||
4031 | ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { | |||
4032 | SDLoc dl(Op); | |||
4033 | return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), | |||
4034 | Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); | |||
4035 | } | |||
4036 | ||||
4037 | SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, | |||
4038 | SelectionDAG &DAG) const { | |||
4039 | SDLoc dl(Op); | |||
4040 | return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, | |||
4041 | Op.getOperand(0)); | |||
4042 | } | |||
4043 | ||||
4044 | SDValue ARMTargetLowering::LowerINTRINSIC_VOID( | |||
4045 | SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { | |||
4046 | unsigned IntNo = | |||
4047 | cast<ConstantSDNode>( | |||
4048 | Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other)) | |||
4049 | ->getZExtValue(); | |||
4050 | switch (IntNo) { | |||
4051 | default: | |||
4052 | return SDValue(); // Don't custom lower most intrinsics. | |||
4053 | case Intrinsic::arm_gnu_eabi_mcount: { | |||
4054 | MachineFunction &MF = DAG.getMachineFunction(); | |||
4055 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
4056 | SDLoc dl(Op); | |||
4057 | SDValue Chain = Op.getOperand(0); | |||
4058 | // call "\01__gnu_mcount_nc" | |||
4059 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); | |||
4060 | const uint32_t *Mask = | |||
4061 | ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); | |||
4062 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4062, __extension__ __PRETTY_FUNCTION__)); | |||
4063 | // Mark LR an implicit live-in. | |||
4064 | Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); | |||
4065 | SDValue ReturnAddress = | |||
4066 | DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT); | |||
4067 | constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue}; | |||
4068 | SDValue Callee = | |||
4069 | DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0); | |||
4070 | SDValue RegisterMask = DAG.getRegisterMask(Mask); | |||
4071 | if (Subtarget->isThumb()) | |||
4072 | return SDValue( | |||
4073 | DAG.getMachineNode( | |||
4074 | ARM::tBL_PUSHLR, dl, ResultTys, | |||
4075 | {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT), | |||
4076 | DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}), | |||
4077 | 0); | |||
4078 | return SDValue( | |||
4079 | DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys, | |||
4080 | {ReturnAddress, Callee, RegisterMask, Chain}), | |||
4081 | 0); | |||
4082 | } | |||
4083 | } | |||
4084 | } | |||
4085 | ||||
4086 | SDValue | |||
4087 | ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, | |||
4088 | const ARMSubtarget *Subtarget) const { | |||
4089 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); | |||
4090 | SDLoc dl(Op); | |||
4091 | switch (IntNo) { | |||
4092 | default: return SDValue(); // Don't custom lower most intrinsics. | |||
4093 | case Intrinsic::thread_pointer: { | |||
4094 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
4095 | return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); | |||
4096 | } | |||
4097 | case Intrinsic::arm_cls: { | |||
4098 | const SDValue &Operand = Op.getOperand(1); | |||
4099 | const EVT VTy = Op.getValueType(); | |||
4100 | SDValue SRA = | |||
4101 | DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy)); | |||
4102 | SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand); | |||
4103 | SDValue SHL = | |||
4104 | DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy)); | |||
4105 | SDValue OR = | |||
4106 | DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy)); | |||
4107 | SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR); | |||
4108 | return Result; | |||
4109 | } | |||
4110 | case Intrinsic::arm_cls64: { | |||
4111 | // cls(x) = if cls(hi(x)) != 31 then cls(hi(x)) | |||
4112 | // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x))) | |||
4113 | const SDValue &Operand = Op.getOperand(1); | |||
4114 | const EVT VTy = Op.getValueType(); | |||
4115 | ||||
4116 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, | |||
4117 | DAG.getConstant(1, dl, VTy)); | |||
4118 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, | |||
4119 | DAG.getConstant(0, dl, VTy)); | |||
4120 | SDValue Constant0 = DAG.getConstant(0, dl, VTy); | |||
4121 | SDValue Constant1 = DAG.getConstant(1, dl, VTy); | |||
4122 | SDValue Constant31 = DAG.getConstant(31, dl, VTy); | |||
4123 | SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31); | |||
4124 | SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi); | |||
4125 | SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1); | |||
4126 | SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1); | |||
4127 | SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi); | |||
4128 | SDValue CheckLo = | |||
4129 | DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ); | |||
4130 | SDValue HiIsZero = | |||
4131 | DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ); | |||
4132 | SDValue AdjustedLo = | |||
4133 | DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy)); | |||
4134 | SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo); | |||
4135 | SDValue Result = | |||
4136 | DAG.getSelect(dl, VTy, CheckLo, | |||
4137 | DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi); | |||
4138 | return Result; | |||
4139 | } | |||
4140 | case Intrinsic::eh_sjlj_lsda: { | |||
4141 | MachineFunction &MF = DAG.getMachineFunction(); | |||
4142 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
4143 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | |||
4144 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | |||
4145 | SDValue CPAddr; | |||
4146 | bool IsPositionIndependent = isPositionIndependent(); | |||
4147 | unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; | |||
4148 | ARMConstantPoolValue *CPV = | |||
4149 | ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, | |||
4150 | ARMCP::CPLSDA, PCAdj); | |||
4151 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | |||
4152 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | |||
4153 | SDValue Result = DAG.getLoad( | |||
4154 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | |||
4155 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | |||
4156 | ||||
4157 | if (IsPositionIndependent) { | |||
4158 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | |||
4159 | Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); | |||
4160 | } | |||
4161 | return Result; | |||
4162 | } | |||
4163 | case Intrinsic::arm_neon_vabs: | |||
4164 | return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), | |||
4165 | Op.getOperand(1)); | |||
4166 | case Intrinsic::arm_neon_vmulls: | |||
4167 | case Intrinsic::arm_neon_vmullu: { | |||
4168 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) | |||
4169 | ? ARMISD::VMULLs : ARMISD::VMULLu; | |||
4170 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | |||
4171 | Op.getOperand(1), Op.getOperand(2)); | |||
4172 | } | |||
4173 | case Intrinsic::arm_neon_vminnm: | |||
4174 | case Intrinsic::arm_neon_vmaxnm: { | |||
4175 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) | |||
4176 | ? ISD::FMINNUM : ISD::FMAXNUM; | |||
4177 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | |||
4178 | Op.getOperand(1), Op.getOperand(2)); | |||
4179 | } | |||
4180 | case Intrinsic::arm_neon_vminu: | |||
4181 | case Intrinsic::arm_neon_vmaxu: { | |||
4182 | if (Op.getValueType().isFloatingPoint()) | |||
4183 | return SDValue(); | |||
4184 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) | |||
4185 | ? ISD::UMIN : ISD::UMAX; | |||
4186 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | |||
4187 | Op.getOperand(1), Op.getOperand(2)); | |||
4188 | } | |||
4189 | case Intrinsic::arm_neon_vmins: | |||
4190 | case Intrinsic::arm_neon_vmaxs: { | |||
4191 | // v{min,max}s is overloaded between signed integers and floats. | |||
4192 | if (!Op.getValueType().isFloatingPoint()) { | |||
4193 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) | |||
4194 | ? ISD::SMIN : ISD::SMAX; | |||
4195 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | |||
4196 | Op.getOperand(1), Op.getOperand(2)); | |||
4197 | } | |||
4198 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) | |||
4199 | ? ISD::FMINIMUM : ISD::FMAXIMUM; | |||
4200 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | |||
4201 | Op.getOperand(1), Op.getOperand(2)); | |||
4202 | } | |||
4203 | case Intrinsic::arm_neon_vtbl1: | |||
4204 | return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), | |||
4205 | Op.getOperand(1), Op.getOperand(2)); | |||
4206 | case Intrinsic::arm_neon_vtbl2: | |||
4207 | return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), | |||
4208 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | |||
4209 | case Intrinsic::arm_mve_pred_i2v: | |||
4210 | case Intrinsic::arm_mve_pred_v2i: | |||
4211 | return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(), | |||
4212 | Op.getOperand(1)); | |||
4213 | case Intrinsic::arm_mve_vreinterpretq: | |||
4214 | return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(), | |||
4215 | Op.getOperand(1)); | |||
4216 | case Intrinsic::arm_mve_lsll: | |||
4217 | return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(), | |||
4218 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | |||
4219 | case Intrinsic::arm_mve_asrl: | |||
4220 | return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(), | |||
4221 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | |||
4222 | } | |||
4223 | } | |||
4224 | ||||
4225 | static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, | |||
4226 | const ARMSubtarget *Subtarget) { | |||
4227 | SDLoc dl(Op); | |||
4228 | ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2)); | |||
4229 | auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue()); | |||
4230 | if (SSID == SyncScope::SingleThread) | |||
4231 | return Op; | |||
4232 | ||||
4233 | if (!Subtarget->hasDataBarrier()) { | |||
4234 | // Some ARMv6 cpus can support data barriers with an mcr instruction. | |||
4235 | // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get | |||
4236 | // here. | |||
4237 | assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4238, __extension__ __PRETTY_FUNCTION__)) | |||
4238 | "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4238, __extension__ __PRETTY_FUNCTION__)); | |||
4239 | return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), | |||
4240 | DAG.getConstant(0, dl, MVT::i32)); | |||
4241 | } | |||
4242 | ||||
4243 | ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); | |||
4244 | AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); | |||
4245 | ARM_MB::MemBOpt Domain = ARM_MB::ISH; | |||
4246 | if (Subtarget->isMClass()) { | |||
4247 | // Only a full system barrier exists in the M-class architectures. | |||
4248 | Domain = ARM_MB::SY; | |||
4249 | } else if (Subtarget->preferISHSTBarriers() && | |||
4250 | Ord == AtomicOrdering::Release) { | |||
4251 | // Swift happens to implement ISHST barriers in a way that's compatible with | |||
4252 | // Release semantics but weaker than ISH so we'd be fools not to use | |||
4253 | // it. Beware: other processors probably don't! | |||
4254 | Domain = ARM_MB::ISHST; | |||
4255 | } | |||
4256 | ||||
4257 | return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), | |||
4258 | DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), | |||
4259 | DAG.getConstant(Domain, dl, MVT::i32)); | |||
4260 | } | |||
4261 | ||||
4262 | static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, | |||
4263 | const ARMSubtarget *Subtarget) { | |||
4264 | // ARM pre v5TE and Thumb1 does not have preload instructions. | |||
4265 | if (!(Subtarget->isThumb2() || | |||
4266 | (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) | |||
4267 | // Just preserve the chain. | |||
4268 | return Op.getOperand(0); | |||
4269 | ||||
4270 | SDLoc dl(Op); | |||
4271 | unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; | |||
4272 | if (!isRead && | |||
4273 | (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) | |||
4274 | // ARMv7 with MP extension has PLDW. | |||
4275 | return Op.getOperand(0); | |||
4276 | ||||
4277 | unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); | |||
4278 | if (Subtarget->isThumb()) { | |||
4279 | // Invert the bits. | |||
4280 | isRead = ~isRead & 1; | |||
4281 | isData = ~isData & 1; | |||
4282 | } | |||
4283 | ||||
4284 | return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), | |||
4285 | Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), | |||
4286 | DAG.getConstant(isData, dl, MVT::i32)); | |||
4287 | } | |||
4288 | ||||
4289 | static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { | |||
4290 | MachineFunction &MF = DAG.getMachineFunction(); | |||
4291 | ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); | |||
4292 | ||||
4293 | // vastart just stores the address of the VarArgsFrameIndex slot into the | |||
4294 | // memory location argument. | |||
4295 | SDLoc dl(Op); | |||
4296 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); | |||
4297 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); | |||
4298 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); | |||
4299 | return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), | |||
4300 | MachinePointerInfo(SV)); | |||
4301 | } | |||
4302 | ||||
4303 | SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, | |||
4304 | CCValAssign &NextVA, | |||
4305 | SDValue &Root, | |||
4306 | SelectionDAG &DAG, | |||
4307 | const SDLoc &dl) const { | |||
4308 | MachineFunction &MF = DAG.getMachineFunction(); | |||
4309 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
4310 | ||||
4311 | const TargetRegisterClass *RC; | |||
4312 | if (AFI->isThumb1OnlyFunction()) | |||
4313 | RC = &ARM::tGPRRegClass; | |||
4314 | else | |||
4315 | RC = &ARM::GPRRegClass; | |||
4316 | ||||
4317 | // Transform the arguments stored in physical registers into virtual ones. | |||
4318 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); | |||
4319 | SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); | |||
4320 | ||||
4321 | SDValue ArgValue2; | |||
4322 | if (NextVA.isMemLoc()) { | |||
4323 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
4324 | int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); | |||
4325 | ||||
4326 | // Create load node to retrieve arguments from the stack. | |||
4327 | SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); | |||
4328 | ArgValue2 = DAG.getLoad( | |||
4329 | MVT::i32, dl, Root, FIN, | |||
4330 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); | |||
4331 | } else { | |||
4332 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); | |||
4333 | ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); | |||
4334 | } | |||
4335 | if (!Subtarget->isLittle()) | |||
4336 | std::swap (ArgValue, ArgValue2); | |||
4337 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); | |||
4338 | } | |||
4339 | ||||
4340 | // The remaining GPRs hold either the beginning of variable-argument | |||
4341 | // data, or the beginning of an aggregate passed by value (usually | |||
4342 | // byval). Either way, we allocate stack slots adjacent to the data | |||
4343 | // provided by our caller, and store the unallocated registers there. | |||
4344 | // If this is a variadic function, the va_list pointer will begin with | |||
4345 | // these values; otherwise, this reassembles a (byval) structure that | |||
4346 | // was split between registers and memory. | |||
4347 | // Return: The frame index registers were stored into. | |||
4348 | int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, | |||
4349 | const SDLoc &dl, SDValue &Chain, | |||
4350 | const Value *OrigArg, | |||
4351 | unsigned InRegsParamRecordIdx, | |||
4352 | int ArgOffset, unsigned ArgSize) const { | |||
4353 | // Currently, two use-cases possible: | |||
4354 | // Case #1. Non-var-args function, and we meet first byval parameter. | |||
4355 | // Setup first unallocated register as first byval register; | |||
4356 | // eat all remained registers | |||
4357 | // (these two actions are performed by HandleByVal method). | |||
4358 | // Then, here, we initialize stack frame with | |||
4359 | // "store-reg" instructions. | |||
4360 | // Case #2. Var-args function, that doesn't contain byval parameters. | |||
4361 | // The same: eat all remained unallocated registers, | |||
4362 | // initialize stack frame. | |||
4363 | ||||
4364 | MachineFunction &MF = DAG.getMachineFunction(); | |||
4365 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
4366 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
4367 | unsigned RBegin, REnd; | |||
4368 | if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { | |||
4369 | CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); | |||
4370 | } else { | |||
4371 | unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); | |||
4372 | RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; | |||
4373 | REnd = ARM::R4; | |||
4374 | } | |||
4375 | ||||
4376 | if (REnd != RBegin) | |||
4377 | ArgOffset = -4 * (ARM::R4 - RBegin); | |||
4378 | ||||
4379 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | |||
4380 | int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); | |||
4381 | SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); | |||
4382 | ||||
4383 | SmallVector<SDValue, 4> MemOps; | |||
4384 | const TargetRegisterClass *RC = | |||
4385 | AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; | |||
4386 | ||||
4387 | for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { | |||
4388 | Register VReg = MF.addLiveIn(Reg, RC); | |||
4389 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); | |||
4390 | SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, | |||
4391 | MachinePointerInfo(OrigArg, 4 * i)); | |||
4392 | MemOps.push_back(Store); | |||
4393 | FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); | |||
4394 | } | |||
4395 | ||||
4396 | if (!MemOps.empty()) | |||
4397 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); | |||
4398 | return FrameIndex; | |||
4399 | } | |||
4400 | ||||
4401 | // Setup stack frame, the va_list pointer will start from. | |||
4402 | void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, | |||
4403 | const SDLoc &dl, SDValue &Chain, | |||
4404 | unsigned ArgOffset, | |||
4405 | unsigned TotalArgRegsSaveSize, | |||
4406 | bool ForceMutable) const { | |||
4407 | MachineFunction &MF = DAG.getMachineFunction(); | |||
4408 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
4409 | ||||
4410 | // Try to store any remaining integer argument regs | |||
4411 | // to their spots on the stack so that they may be loaded by dereferencing | |||
4412 | // the result of va_next. | |||
4413 | // If there is no regs to be stored, just point address after last | |||
4414 | // argument passed via stack. | |||
4415 | int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, | |||
4416 | CCInfo.getInRegsParamsCount(), | |||
4417 | CCInfo.getNextStackOffset(), | |||
4418 | std::max(4U, TotalArgRegsSaveSize)); | |||
4419 | AFI->setVarArgsFrameIndex(FrameIndex); | |||
4420 | } | |||
4421 | ||||
4422 | bool ARMTargetLowering::splitValueIntoRegisterParts( | |||
4423 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, | |||
4424 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { | |||
4425 | EVT ValueVT = Val.getValueType(); | |||
4426 | if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { | |||
4427 | unsigned ValueBits = ValueVT.getSizeInBits(); | |||
4428 | unsigned PartBits = PartVT.getSizeInBits(); | |||
4429 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); | |||
4430 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); | |||
4431 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); | |||
4432 | Parts[0] = Val; | |||
4433 | return true; | |||
4434 | } | |||
4435 | return false; | |||
4436 | } | |||
4437 | ||||
4438 | SDValue ARMTargetLowering::joinRegisterPartsIntoValue( | |||
4439 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, | |||
4440 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { | |||
4441 | if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { | |||
4442 | unsigned ValueBits = ValueVT.getSizeInBits(); | |||
4443 | unsigned PartBits = PartVT.getSizeInBits(); | |||
4444 | SDValue Val = Parts[0]; | |||
4445 | ||||
4446 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); | |||
4447 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); | |||
4448 | Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); | |||
4449 | return Val; | |||
4450 | } | |||
4451 | return SDValue(); | |||
4452 | } | |||
4453 | ||||
4454 | SDValue ARMTargetLowering::LowerFormalArguments( | |||
4455 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, | |||
4456 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | |||
4457 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { | |||
4458 | MachineFunction &MF = DAG.getMachineFunction(); | |||
4459 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
4460 | ||||
4461 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
4462 | ||||
4463 | // Assign locations to all of the incoming arguments. | |||
4464 | SmallVector<CCValAssign, 16> ArgLocs; | |||
4465 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, | |||
4466 | *DAG.getContext()); | |||
4467 | CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); | |||
4468 | ||||
4469 | SmallVector<SDValue, 16> ArgValues; | |||
4470 | SDValue ArgValue; | |||
4471 | Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); | |||
4472 | unsigned CurArgIdx = 0; | |||
4473 | ||||
4474 | // Initially ArgRegsSaveSize is zero. | |||
4475 | // Then we increase this value each time we meet byval parameter. | |||
4476 | // We also increase this value in case of varargs function. | |||
4477 | AFI->setArgRegsSaveSize(0); | |||
4478 | ||||
4479 | // Calculate the amount of stack space that we need to allocate to store | |||
4480 | // byval and variadic arguments that are passed in registers. | |||
4481 | // We need to know this before we allocate the first byval or variadic | |||
4482 | // argument, as they will be allocated a stack slot below the CFA (Canonical | |||
4483 | // Frame Address, the stack pointer at entry to the function). | |||
4484 | unsigned ArgRegBegin = ARM::R4; | |||
4485 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { | |||
4486 | if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) | |||
4487 | break; | |||
4488 | ||||
4489 | CCValAssign &VA = ArgLocs[i]; | |||
4490 | unsigned Index = VA.getValNo(); | |||
4491 | ISD::ArgFlagsTy Flags = Ins[Index].Flags; | |||
4492 | if (!Flags.isByVal()) | |||
4493 | continue; | |||
4494 | ||||
4495 | assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg" ) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4495, __extension__ __PRETTY_FUNCTION__)); | |||
4496 | unsigned RBegin, REnd; | |||
4497 | CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); | |||
4498 | ArgRegBegin = std::min(ArgRegBegin, RBegin); | |||
4499 | ||||
4500 | CCInfo.nextInRegsParam(); | |||
4501 | } | |||
4502 | CCInfo.rewindByValRegsInfo(); | |||
4503 | ||||
4504 | int lastInsIndex = -1; | |||
4505 | if (isVarArg && MFI.hasVAStart()) { | |||
4506 | unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); | |||
4507 | if (RegIdx != std::size(GPRArgRegs)) | |||
4508 | ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); | |||
4509 | } | |||
4510 | ||||
4511 | unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); | |||
4512 | AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); | |||
4513 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | |||
4514 | ||||
4515 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { | |||
4516 | CCValAssign &VA = ArgLocs[i]; | |||
4517 | if (Ins[VA.getValNo()].isOrigArg()) { | |||
4518 | std::advance(CurOrigArg, | |||
4519 | Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); | |||
4520 | CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); | |||
4521 | } | |||
4522 | // Arguments stored in registers. | |||
4523 | if (VA.isRegLoc()) { | |||
4524 | EVT RegVT = VA.getLocVT(); | |||
4525 | ||||
4526 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { | |||
4527 | // f64 and vector types are split up into multiple registers or | |||
4528 | // combinations of registers and stack slots. | |||
4529 | SDValue ArgValue1 = | |||
4530 | GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | |||
4531 | VA = ArgLocs[++i]; // skip ahead to next loc | |||
4532 | SDValue ArgValue2; | |||
4533 | if (VA.isMemLoc()) { | |||
4534 | int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); | |||
4535 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | |||
4536 | ArgValue2 = DAG.getLoad( | |||
4537 | MVT::f64, dl, Chain, FIN, | |||
4538 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); | |||
4539 | } else { | |||
4540 | ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | |||
4541 | } | |||
4542 | ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); | |||
4543 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, | |||
4544 | ArgValue1, DAG.getIntPtrConstant(0, dl)); | |||
4545 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, | |||
4546 | ArgValue2, DAG.getIntPtrConstant(1, dl)); | |||
4547 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { | |||
4548 | ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | |||
4549 | } else { | |||
4550 | const TargetRegisterClass *RC; | |||
4551 | ||||
4552 | if (RegVT == MVT::f16 || RegVT == MVT::bf16) | |||
4553 | RC = &ARM::HPRRegClass; | |||
4554 | else if (RegVT == MVT::f32) | |||
4555 | RC = &ARM::SPRRegClass; | |||
4556 | else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 || | |||
4557 | RegVT == MVT::v4bf16) | |||
4558 | RC = &ARM::DPRRegClass; | |||
4559 | else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 || | |||
4560 | RegVT == MVT::v8bf16) | |||
4561 | RC = &ARM::QPRRegClass; | |||
4562 | else if (RegVT == MVT::i32) | |||
4563 | RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass | |||
4564 | : &ARM::GPRRegClass; | |||
4565 | else | |||
4566 | llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4566); | |||
4567 | ||||
4568 | // Transform the arguments in physical registers into virtual ones. | |||
4569 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); | |||
4570 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); | |||
4571 | ||||
4572 | // If this value is passed in r0 and has the returned attribute (e.g. | |||
4573 | // C++ 'structors), record this fact for later use. | |||
4574 | if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) { | |||
4575 | AFI->setPreservesR0(); | |||
4576 | } | |||
4577 | } | |||
4578 | ||||
4579 | // If this is an 8 or 16-bit value, it is really passed promoted | |||
4580 | // to 32 bits. Insert an assert[sz]ext to capture this, then | |||
4581 | // truncate to the right size. | |||
4582 | switch (VA.getLocInfo()) { | |||
4583 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 4583); | |||
4584 | case CCValAssign::Full: break; | |||
4585 | case CCValAssign::BCvt: | |||
4586 | ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); | |||
4587 | break; | |||
4588 | case CCValAssign::SExt: | |||
4589 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, | |||
4590 | DAG.getValueType(VA.getValVT())); | |||
4591 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); | |||
4592 | break; | |||
4593 | case CCValAssign::ZExt: | |||
4594 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, | |||
4595 | DAG.getValueType(VA.getValVT())); | |||
4596 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); | |||
4597 | break; | |||
4598 | } | |||
4599 | ||||
4600 | // f16 arguments have their size extended to 4 bytes and passed as if they | |||
4601 | // had been copied to the LSBs of a 32-bit register. | |||
4602 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | |||
4603 | if (VA.needsCustom() && | |||
4604 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) | |||
4605 | ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue); | |||
4606 | ||||
4607 | InVals.push_back(ArgValue); | |||
4608 | } else { // VA.isRegLoc() | |||
4609 | // Only arguments passed on the stack should make it here. | |||
4610 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4610, __extension__ __PRETTY_FUNCTION__)); | |||
4611 | assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 && "i64 should already be lowered") ? void (0) : __assert_fail ( "VA.getValVT() != MVT::i64 && \"i64 should already be lowered\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4611, __extension__ __PRETTY_FUNCTION__)); | |||
4612 | ||||
4613 | int index = VA.getValNo(); | |||
4614 | ||||
4615 | // Some Ins[] entries become multiple ArgLoc[] entries. | |||
4616 | // Process them only once. | |||
4617 | if (index != lastInsIndex) | |||
4618 | { | |||
4619 | ISD::ArgFlagsTy Flags = Ins[index].Flags; | |||
4620 | // FIXME: For now, all byval parameter objects are marked mutable. | |||
4621 | // This can be changed with more analysis. | |||
4622 | // In case of tail call optimization mark all arguments mutable. | |||
4623 | // Since they could be overwritten by lowering of arguments in case of | |||
4624 | // a tail call. | |||
4625 | if (Flags.isByVal()) { | |||
4626 | assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4627, __extension__ __PRETTY_FUNCTION__)) | |||
4627 | "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4627, __extension__ __PRETTY_FUNCTION__)); | |||
4628 | unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); | |||
4629 | ||||
4630 | int FrameIndex = StoreByValRegs( | |||
4631 | CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, | |||
4632 | VA.getLocMemOffset(), Flags.getByValSize()); | |||
4633 | InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); | |||
4634 | CCInfo.nextInRegsParam(); | |||
4635 | } else { | |||
4636 | unsigned FIOffset = VA.getLocMemOffset(); | |||
4637 | int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, | |||
4638 | FIOffset, true); | |||
4639 | ||||
4640 | // Create load nodes to retrieve arguments from the stack. | |||
4641 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | |||
4642 | InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, | |||
4643 | MachinePointerInfo::getFixedStack( | |||
4644 | DAG.getMachineFunction(), FI))); | |||
4645 | } | |||
4646 | lastInsIndex = index; | |||
4647 | } | |||
4648 | } | |||
4649 | } | |||
4650 | ||||
4651 | // varargs | |||
4652 | if (isVarArg && MFI.hasVAStart()) { | |||
4653 | VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), | |||
4654 | TotalArgRegsSaveSize); | |||
4655 | if (AFI->isCmseNSEntryFunction()) { | |||
4656 | DiagnosticInfoUnsupported Diag( | |||
4657 | DAG.getMachineFunction().getFunction(), | |||
4658 | "secure entry function must not be variadic", dl.getDebugLoc()); | |||
4659 | DAG.getContext()->diagnose(Diag); | |||
4660 | } | |||
4661 | } | |||
4662 | ||||
4663 | unsigned StackArgSize = CCInfo.getNextStackOffset(); | |||
4664 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; | |||
4665 | if (canGuaranteeTCO(CallConv, TailCallOpt)) { | |||
4666 | // The only way to guarantee a tail call is if the callee restores its | |||
4667 | // argument area, but it must also keep the stack aligned when doing so. | |||
4668 | const DataLayout &DL = DAG.getDataLayout(); | |||
4669 | StackArgSize = alignTo(StackArgSize, DL.getStackAlignment()); | |||
4670 | ||||
4671 | AFI->setArgumentStackToRestore(StackArgSize); | |||
4672 | } | |||
4673 | AFI->setArgumentStackSize(StackArgSize); | |||
4674 | ||||
4675 | if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) { | |||
4676 | DiagnosticInfoUnsupported Diag( | |||
4677 | DAG.getMachineFunction().getFunction(), | |||
4678 | "secure entry function requires arguments on stack", dl.getDebugLoc()); | |||
4679 | DAG.getContext()->diagnose(Diag); | |||
4680 | } | |||
4681 | ||||
4682 | return Chain; | |||
4683 | } | |||
4684 | ||||
4685 | /// isFloatingPointZero - Return true if this is +0.0. | |||
4686 | static bool isFloatingPointZero(SDValue Op) { | |||
4687 | if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) | |||
4688 | return CFP->getValueAPF().isPosZero(); | |||
4689 | else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { | |||
4690 | // Maybe this has already been legalized into the constant pool? | |||
4691 | if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { | |||
4692 | SDValue WrapperOp = Op.getOperand(1).getOperand(0); | |||
4693 | if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) | |||
4694 | if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) | |||
4695 | return CFP->getValueAPF().isPosZero(); | |||
4696 | } | |||
4697 | } else if (Op->getOpcode() == ISD::BITCAST && | |||
4698 | Op->getValueType(0) == MVT::f64) { | |||
4699 | // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) | |||
4700 | // created by LowerConstantFP(). | |||
4701 | SDValue BitcastOp = Op->getOperand(0); | |||
4702 | if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && | |||
4703 | isNullConstant(BitcastOp->getOperand(0))) | |||
4704 | return true; | |||
4705 | } | |||
4706 | return false; | |||
4707 | } | |||
4708 | ||||
4709 | /// Returns appropriate ARM CMP (cmp) and corresponding condition code for | |||
4710 | /// the given operands. | |||
4711 | SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, | |||
4712 | SDValue &ARMcc, SelectionDAG &DAG, | |||
4713 | const SDLoc &dl) const { | |||
4714 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { | |||
4715 | unsigned C = RHSC->getZExtValue(); | |||
4716 | if (!isLegalICmpImmediate((int32_t)C)) { | |||
4717 | // Constant does not fit, try adjusting it by one. | |||
4718 | switch (CC) { | |||
4719 | default: break; | |||
4720 | case ISD::SETLT: | |||
4721 | case ISD::SETGE: | |||
4722 | if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { | |||
4723 | CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; | |||
4724 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); | |||
4725 | } | |||
4726 | break; | |||
4727 | case ISD::SETULT: | |||
4728 | case ISD::SETUGE: | |||
4729 | if (C != 0 && isLegalICmpImmediate(C-1)) { | |||
4730 | CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; | |||
4731 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); | |||
4732 | } | |||
4733 | break; | |||
4734 | case ISD::SETLE: | |||
4735 | case ISD::SETGT: | |||
4736 | if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { | |||
4737 | CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; | |||
4738 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); | |||
4739 | } | |||
4740 | break; | |||
4741 | case ISD::SETULE: | |||
4742 | case ISD::SETUGT: | |||
4743 | if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { | |||
4744 | CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; | |||
4745 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); | |||
4746 | } | |||
4747 | break; | |||
4748 | } | |||
4749 | } | |||
4750 | } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && | |||
4751 | (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { | |||
4752 | // In ARM and Thumb-2, the compare instructions can shift their second | |||
4753 | // operand. | |||
4754 | CC = ISD::getSetCCSwappedOperands(CC); | |||
4755 | std::swap(LHS, RHS); | |||
4756 | } | |||
4757 | ||||
4758 | // Thumb1 has very limited immediate modes, so turning an "and" into a | |||
4759 | // shift can save multiple instructions. | |||
4760 | // | |||
4761 | // If we have (x & C1), and C1 is an appropriate mask, we can transform it | |||
4762 | // into "((x << n) >> n)". But that isn't necessarily profitable on its | |||
4763 | // own. If it's the operand to an unsigned comparison with an immediate, | |||
4764 | // we can eliminate one of the shifts: we transform | |||
4765 | // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)". | |||
4766 | // | |||
4767 | // We avoid transforming cases which aren't profitable due to encoding | |||
4768 | // details: | |||
4769 | // | |||
4770 | // 1. C2 fits into the immediate field of a cmp, and the transformed version | |||
4771 | // would not; in that case, we're essentially trading one immediate load for | |||
4772 | // another. | |||
4773 | // 2. C1 is 255 or 65535, so we can use uxtb or uxth. | |||
4774 | // 3. C2 is zero; we have other code for this special case. | |||
4775 | // | |||
4776 | // FIXME: Figure out profitability for Thumb2; we usually can't save an | |||
4777 | // instruction, since the AND is always one instruction anyway, but we could | |||
4778 | // use narrow instructions in some cases. | |||
4779 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND && | |||
4780 | LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) && | |||
4781 | LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) && | |||
4782 | !isSignedIntSetCC(CC)) { | |||
4783 | unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue(); | |||
4784 | auto *RHSC = cast<ConstantSDNode>(RHS.getNode()); | |||
4785 | uint64_t RHSV = RHSC->getZExtValue(); | |||
4786 | if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { | |||
4787 | unsigned ShiftBits = llvm::countl_zero(Mask); | |||
4788 | if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) { | |||
4789 | SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32); | |||
4790 | LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt); | |||
4791 | RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32); | |||
4792 | } | |||
4793 | } | |||
4794 | } | |||
4795 | ||||
4796 | // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a | |||
4797 | // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same | |||
4798 | // way a cmp would. | |||
4799 | // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and | |||
4800 | // some tweaks to the heuristics for the previous and->shift transform. | |||
4801 | // FIXME: Optimize cases where the LHS isn't a shift. | |||
4802 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL && | |||
4803 | isa<ConstantSDNode>(RHS) && | |||
4804 | cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U && | |||
4805 | CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && | |||
4806 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) { | |||
4807 | unsigned ShiftAmt = | |||
4808 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1; | |||
4809 | SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, | |||
4810 | DAG.getVTList(MVT::i32, MVT::i32), | |||
4811 | LHS.getOperand(0), | |||
4812 | DAG.getConstant(ShiftAmt, dl, MVT::i32)); | |||
4813 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, | |||
4814 | Shift.getValue(1), SDValue()); | |||
4815 | ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); | |||
4816 | return Chain.getValue(1); | |||
4817 | } | |||
4818 | ||||
4819 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | |||
4820 | ||||
4821 | // If the RHS is a constant zero then the V (overflow) flag will never be | |||
4822 | // set. This can allow us to simplify GE to PL or LT to MI, which can be | |||
4823 | // simpler for other passes (like the peephole optimiser) to deal with. | |||
4824 | if (isNullConstant(RHS)) { | |||
4825 | switch (CondCode) { | |||
4826 | default: break; | |||
4827 | case ARMCC::GE: | |||
4828 | CondCode = ARMCC::PL; | |||
4829 | break; | |||
4830 | case ARMCC::LT: | |||
4831 | CondCode = ARMCC::MI; | |||
4832 | break; | |||
4833 | } | |||
4834 | } | |||
4835 | ||||
4836 | ARMISD::NodeType CompareType; | |||
4837 | switch (CondCode) { | |||
4838 | default: | |||
4839 | CompareType = ARMISD::CMP; | |||
4840 | break; | |||
4841 | case ARMCC::EQ: | |||
4842 | case ARMCC::NE: | |||
4843 | // Uses only Z Flag | |||
4844 | CompareType = ARMISD::CMPZ; | |||
4845 | break; | |||
4846 | } | |||
4847 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | |||
4848 | return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); | |||
4849 | } | |||
4850 | ||||
4851 | /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. | |||
4852 | SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, | |||
4853 | SelectionDAG &DAG, const SDLoc &dl, | |||
4854 | bool Signaling) const { | |||
4855 | assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)(static_cast <bool> (Subtarget->hasFP64() || RHS.getValueType () != MVT::f64) ? void (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4855, __extension__ __PRETTY_FUNCTION__)); | |||
4856 | SDValue Cmp; | |||
4857 | if (!isFloatingPointZero(RHS)) | |||
4858 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, | |||
4859 | dl, MVT::Glue, LHS, RHS); | |||
4860 | else | |||
4861 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, | |||
4862 | dl, MVT::Glue, LHS); | |||
4863 | return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); | |||
4864 | } | |||
4865 | ||||
4866 | /// duplicateCmp - Glue values can have only one use, so this function | |||
4867 | /// duplicates a comparison node. | |||
4868 | SDValue | |||
4869 | ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { | |||
4870 | unsigned Opc = Cmp.getOpcode(); | |||
4871 | SDLoc DL(Cmp); | |||
4872 | if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) | |||
4873 | return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); | |||
4874 | ||||
4875 | assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation" ) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4875, __extension__ __PRETTY_FUNCTION__)); | |||
4876 | Cmp = Cmp.getOperand(0); | |||
4877 | Opc = Cmp.getOpcode(); | |||
4878 | if (Opc == ARMISD::CMPFP) | |||
4879 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); | |||
4880 | else { | |||
4881 | assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT" ) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4881, __extension__ __PRETTY_FUNCTION__)); | |||
4882 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); | |||
4883 | } | |||
4884 | return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); | |||
4885 | } | |||
4886 | ||||
4887 | // This function returns three things: the arithmetic computation itself | |||
4888 | // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The | |||
4889 | // comparison and the condition code define the case in which the arithmetic | |||
4890 | // computation *does not* overflow. | |||
4891 | std::pair<SDValue, SDValue> | |||
4892 | ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, | |||
4893 | SDValue &ARMcc) const { | |||
4894 | assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 && "Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4894, __extension__ __PRETTY_FUNCTION__)); | |||
4895 | ||||
4896 | SDValue Value, OverflowCmp; | |||
4897 | SDValue LHS = Op.getOperand(0); | |||
4898 | SDValue RHS = Op.getOperand(1); | |||
4899 | SDLoc dl(Op); | |||
4900 | ||||
4901 | // FIXME: We are currently always generating CMPs because we don't support | |||
4902 | // generating CMN through the backend. This is not as good as the natural | |||
4903 | // CMP case because it causes a register dependency and cannot be folded | |||
4904 | // later. | |||
4905 | ||||
4906 | switch (Op.getOpcode()) { | |||
4907 | default: | |||
4908 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4908); | |||
4909 | case ISD::SADDO: | |||
4910 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); | |||
4911 | Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); | |||
4912 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); | |||
4913 | break; | |||
4914 | case ISD::UADDO: | |||
4915 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); | |||
4916 | // We use ADDC here to correspond to its use in LowerUnsignedALUO. | |||
4917 | // We do not use it in the USUBO case as Value may not be used. | |||
4918 | Value = DAG.getNode(ARMISD::ADDC, dl, | |||
4919 | DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) | |||
4920 | .getValue(0); | |||
4921 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); | |||
4922 | break; | |||
4923 | case ISD::SSUBO: | |||
4924 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); | |||
4925 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); | |||
4926 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); | |||
4927 | break; | |||
4928 | case ISD::USUBO: | |||
4929 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); | |||
4930 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); | |||
4931 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); | |||
4932 | break; | |||
4933 | case ISD::UMULO: | |||
4934 | // We generate a UMUL_LOHI and then check if the high word is 0. | |||
4935 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); | |||
4936 | Value = DAG.getNode(ISD::UMUL_LOHI, dl, | |||
4937 | DAG.getVTList(Op.getValueType(), Op.getValueType()), | |||
4938 | LHS, RHS); | |||
4939 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), | |||
4940 | DAG.getConstant(0, dl, MVT::i32)); | |||
4941 | Value = Value.getValue(0); // We only want the low 32 bits for the result. | |||
4942 | break; | |||
4943 | case ISD::SMULO: | |||
4944 | // We generate a SMUL_LOHI and then check if all the bits of the high word | |||
4945 | // are the same as the sign bit of the low word. | |||
4946 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); | |||
4947 | Value = DAG.getNode(ISD::SMUL_LOHI, dl, | |||
4948 | DAG.getVTList(Op.getValueType(), Op.getValueType()), | |||
4949 | LHS, RHS); | |||
4950 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), | |||
4951 | DAG.getNode(ISD::SRA, dl, Op.getValueType(), | |||
4952 | Value.getValue(0), | |||
4953 | DAG.getConstant(31, dl, MVT::i32))); | |||
4954 | Value = Value.getValue(0); // We only want the low 32 bits for the result. | |||
4955 | break; | |||
4956 | } // switch (...) | |||
4957 | ||||
4958 | return std::make_pair(Value, OverflowCmp); | |||
4959 | } | |||
4960 | ||||
4961 | SDValue | |||
4962 | ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { | |||
4963 | // Let legalize expand this if it isn't a legal type yet. | |||
4964 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) | |||
4965 | return SDValue(); | |||
4966 | ||||
4967 | SDValue Value, OverflowCmp; | |||
4968 | SDValue ARMcc; | |||
4969 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); | |||
4970 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
4971 | SDLoc dl(Op); | |||
4972 | // We use 0 and 1 as false and true values. | |||
4973 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); | |||
4974 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); | |||
4975 | EVT VT = Op.getValueType(); | |||
4976 | ||||
4977 | SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, | |||
4978 | ARMcc, CCR, OverflowCmp); | |||
4979 | ||||
4980 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); | |||
4981 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); | |||
4982 | } | |||
4983 | ||||
4984 | static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, | |||
4985 | SelectionDAG &DAG) { | |||
4986 | SDLoc DL(BoolCarry); | |||
4987 | EVT CarryVT = BoolCarry.getValueType(); | |||
4988 | ||||
4989 | // This converts the boolean value carry into the carry flag by doing | |||
4990 | // ARMISD::SUBC Carry, 1 | |||
4991 | SDValue Carry = DAG.getNode(ARMISD::SUBC, DL, | |||
4992 | DAG.getVTList(CarryVT, MVT::i32), | |||
4993 | BoolCarry, DAG.getConstant(1, DL, CarryVT)); | |||
4994 | return Carry.getValue(1); | |||
4995 | } | |||
4996 | ||||
4997 | static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, | |||
4998 | SelectionDAG &DAG) { | |||
4999 | SDLoc DL(Flags); | |||
5000 | ||||
5001 | // Now convert the carry flag into a boolean carry. We do this | |||
5002 | // using ARMISD:ADDE 0, 0, Carry | |||
5003 | return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), | |||
5004 | DAG.getConstant(0, DL, MVT::i32), | |||
5005 | DAG.getConstant(0, DL, MVT::i32), Flags); | |||
5006 | } | |||
5007 | ||||
5008 | SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, | |||
5009 | SelectionDAG &DAG) const { | |||
5010 | // Let legalize expand this if it isn't a legal type yet. | |||
5011 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) | |||
5012 | return SDValue(); | |||
5013 | ||||
5014 | SDValue LHS = Op.getOperand(0); | |||
5015 | SDValue RHS = Op.getOperand(1); | |||
5016 | SDLoc dl(Op); | |||
5017 | ||||
5018 | EVT VT = Op.getValueType(); | |||
5019 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); | |||
5020 | SDValue Value; | |||
5021 | SDValue Overflow; | |||
5022 | switch (Op.getOpcode()) { | |||
5023 | default: | |||
5024 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5024); | |||
5025 | case ISD::UADDO: | |||
5026 | Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); | |||
5027 | // Convert the carry flag into a boolean value. | |||
5028 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); | |||
5029 | break; | |||
5030 | case ISD::USUBO: { | |||
5031 | Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); | |||
5032 | // Convert the carry flag into a boolean value. | |||
5033 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); | |||
5034 | // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow | |||
5035 | // value. So compute 1 - C. | |||
5036 | Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, | |||
5037 | DAG.getConstant(1, dl, MVT::i32), Overflow); | |||
5038 | break; | |||
5039 | } | |||
5040 | } | |||
5041 | ||||
5042 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); | |||
5043 | } | |||
5044 | ||||
5045 | static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, | |||
5046 | const ARMSubtarget *Subtarget) { | |||
5047 | EVT VT = Op.getValueType(); | |||
5048 | if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) | |||
5049 | return SDValue(); | |||
5050 | if (!VT.isSimple()) | |||
5051 | return SDValue(); | |||
5052 | ||||
5053 | unsigned NewOpcode; | |||
5054 | switch (VT.getSimpleVT().SimpleTy) { | |||
5055 | default: | |||
5056 | return SDValue(); | |||
5057 | case MVT::i8: | |||
5058 | switch (Op->getOpcode()) { | |||
5059 | case ISD::UADDSAT: | |||
5060 | NewOpcode = ARMISD::UQADD8b; | |||
5061 | break; | |||
5062 | case ISD::SADDSAT: | |||
5063 | NewOpcode = ARMISD::QADD8b; | |||
5064 | break; | |||
5065 | case ISD::USUBSAT: | |||
5066 | NewOpcode = ARMISD::UQSUB8b; | |||
5067 | break; | |||
5068 | case ISD::SSUBSAT: | |||
5069 | NewOpcode = ARMISD::QSUB8b; | |||
5070 | break; | |||
5071 | } | |||
5072 | break; | |||
5073 | case MVT::i16: | |||
5074 | switch (Op->getOpcode()) { | |||
5075 | case ISD::UADDSAT: | |||
5076 | NewOpcode = ARMISD::UQADD16b; | |||
5077 | break; | |||
5078 | case ISD::SADDSAT: | |||
5079 | NewOpcode = ARMISD::QADD16b; | |||
5080 | break; | |||
5081 | case ISD::USUBSAT: | |||
5082 | NewOpcode = ARMISD::UQSUB16b; | |||
5083 | break; | |||
5084 | case ISD::SSUBSAT: | |||
5085 | NewOpcode = ARMISD::QSUB16b; | |||
5086 | break; | |||
5087 | } | |||
5088 | break; | |||
5089 | } | |||
5090 | ||||
5091 | SDLoc dl(Op); | |||
5092 | SDValue Add = | |||
5093 | DAG.getNode(NewOpcode, dl, MVT::i32, | |||
| ||||
5094 | DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), | |||
5095 | DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); | |||
5096 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); | |||
5097 | } | |||
5098 | ||||
5099 | SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { | |||
5100 | SDValue Cond = Op.getOperand(0); | |||
5101 | SDValue SelectTrue = Op.getOperand(1); | |||
5102 | SDValue SelectFalse = Op.getOperand(2); | |||
5103 | SDLoc dl(Op); | |||
5104 | unsigned Opc = Cond.getOpcode(); | |||
5105 | ||||
5106 | if (Cond.getResNo() == 1 && | |||
5107 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
5108 | Opc == ISD::USUBO)) { | |||
5109 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) | |||
5110 | return SDValue(); | |||
5111 | ||||
5112 | SDValue Value, OverflowCmp; | |||
5113 | SDValue ARMcc; | |||
5114 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); | |||
5115 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5116 | EVT VT = Op.getValueType(); | |||
5117 | ||||
5118 | return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, | |||
5119 | OverflowCmp, DAG); | |||
5120 | } | |||
5121 | ||||
5122 | // Convert: | |||
5123 | // | |||
5124 | // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) | |||
5125 | // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) | |||
5126 | // | |||
5127 | if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { | |||
5128 | const ConstantSDNode *CMOVTrue = | |||
5129 | dyn_cast<ConstantSDNode>(Cond.getOperand(0)); | |||
5130 | const ConstantSDNode *CMOVFalse = | |||
5131 | dyn_cast<ConstantSDNode>(Cond.getOperand(1)); | |||
5132 | ||||
5133 | if (CMOVTrue && CMOVFalse) { | |||
5134 | unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); | |||
5135 | unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); | |||
5136 | ||||
5137 | SDValue True; | |||
5138 | SDValue False; | |||
5139 | if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { | |||
5140 | True = SelectTrue; | |||
5141 | False = SelectFalse; | |||
5142 | } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { | |||
5143 | True = SelectFalse; | |||
5144 | False = SelectTrue; | |||
5145 | } | |||
5146 | ||||
5147 | if (True.getNode() && False.getNode()) { | |||
5148 | EVT VT = Op.getValueType(); | |||
5149 | SDValue ARMcc = Cond.getOperand(2); | |||
5150 | SDValue CCR = Cond.getOperand(3); | |||
5151 | SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); | |||
5152 | assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void ( 0) : __assert_fail ("True.getValueType() == VT", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 5152, __extension__ __PRETTY_FUNCTION__)); | |||
5153 | return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); | |||
5154 | } | |||
5155 | } | |||
5156 | } | |||
5157 | ||||
5158 | // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the | |||
5159 | // undefined bits before doing a full-word comparison with zero. | |||
5160 | Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, | |||
5161 | DAG.getConstant(1, dl, Cond.getValueType())); | |||
5162 | ||||
5163 | return DAG.getSelectCC(dl, Cond, | |||
5164 | DAG.getConstant(0, dl, Cond.getValueType()), | |||
5165 | SelectTrue, SelectFalse, ISD::SETNE); | |||
5166 | } | |||
5167 | ||||
5168 | static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, | |||
5169 | bool &swpCmpOps, bool &swpVselOps) { | |||
5170 | // Start by selecting the GE condition code for opcodes that return true for | |||
5171 | // 'equality' | |||
5172 | if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || | |||
5173 | CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE) | |||
5174 | CondCode = ARMCC::GE; | |||
5175 | ||||
5176 | // and GT for opcodes that return false for 'equality'. | |||
5177 | else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || | |||
5178 | CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT) | |||
5179 | CondCode = ARMCC::GT; | |||
5180 | ||||
5181 | // Since we are constrained to GE/GT, if the opcode contains 'less', we need | |||
5182 | // to swap the compare operands. | |||
5183 | if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || | |||
5184 | CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT) | |||
5185 | swpCmpOps = true; | |||
5186 | ||||
5187 | // Both GT and GE are ordered comparisons, and return false for 'unordered'. | |||
5188 | // If we have an unordered opcode, we need to swap the operands to the VSEL | |||
5189 | // instruction (effectively negating the condition). | |||
5190 | // | |||
5191 | // This also has the effect of swapping which one of 'less' or 'greater' | |||
5192 | // returns true, so we also swap the compare operands. It also switches | |||
5193 | // whether we return true for 'equality', so we compensate by picking the | |||
5194 | // opposite condition code to our original choice. | |||
5195 | if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || | |||
5196 | CC == ISD::SETUGT) { | |||
5197 | swpCmpOps = !swpCmpOps; | |||
5198 | swpVselOps = !swpVselOps; | |||
5199 | CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; | |||
5200 | } | |||
5201 | ||||
5202 | // 'ordered' is 'anything but unordered', so use the VS condition code and | |||
5203 | // swap the VSEL operands. | |||
5204 | if (CC == ISD::SETO) { | |||
5205 | CondCode = ARMCC::VS; | |||
5206 | swpVselOps = true; | |||
5207 | } | |||
5208 | ||||
5209 | // 'unordered or not equal' is 'anything but equal', so use the EQ condition | |||
5210 | // code and swap the VSEL operands. Also do this if we don't care about the | |||
5211 | // unordered case. | |||
5212 | if (CC == ISD::SETUNE || CC == ISD::SETNE) { | |||
5213 | CondCode = ARMCC::EQ; | |||
5214 | swpVselOps = true; | |||
5215 | } | |||
5216 | } | |||
5217 | ||||
5218 | SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, | |||
5219 | SDValue TrueVal, SDValue ARMcc, SDValue CCR, | |||
5220 | SDValue Cmp, SelectionDAG &DAG) const { | |||
5221 | if (!Subtarget->hasFP64() && VT == MVT::f64) { | |||
5222 | FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, | |||
5223 | DAG.getVTList(MVT::i32, MVT::i32), FalseVal); | |||
5224 | TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, | |||
5225 | DAG.getVTList(MVT::i32, MVT::i32), TrueVal); | |||
5226 | ||||
5227 | SDValue TrueLow = TrueVal.getValue(0); | |||
5228 | SDValue TrueHigh = TrueVal.getValue(1); | |||
5229 | SDValue FalseLow = FalseVal.getValue(0); | |||
5230 | SDValue FalseHigh = FalseVal.getValue(1); | |||
5231 | ||||
5232 | SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, | |||
5233 | ARMcc, CCR, Cmp); | |||
5234 | SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, | |||
5235 | ARMcc, CCR, duplicateCmp(Cmp, DAG)); | |||
5236 | ||||
5237 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); | |||
5238 | } else { | |||
5239 | return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, | |||
5240 | Cmp); | |||
5241 | } | |||
5242 | } | |||
5243 | ||||
5244 | static bool isGTorGE(ISD::CondCode CC) { | |||
5245 | return CC == ISD::SETGT || CC == ISD::SETGE; | |||
5246 | } | |||
5247 | ||||
5248 | static bool isLTorLE(ISD::CondCode CC) { | |||
5249 | return CC == ISD::SETLT || CC == ISD::SETLE; | |||
5250 | } | |||
5251 | ||||
5252 | // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. | |||
5253 | // All of these conditions (and their <= and >= counterparts) will do: | |||
5254 | // x < k ? k : x | |||
5255 | // x > k ? x : k | |||
5256 | // k < x ? x : k | |||
5257 | // k > x ? k : x | |||
5258 | static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, | |||
5259 | const SDValue TrueVal, const SDValue FalseVal, | |||
5260 | const ISD::CondCode CC, const SDValue K) { | |||
5261 | return (isGTorGE(CC) && | |||
5262 | ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || | |||
5263 | (isLTorLE(CC) && | |||
5264 | ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); | |||
5265 | } | |||
5266 | ||||
5267 | // Check if two chained conditionals could be converted into SSAT or USAT. | |||
5268 | // | |||
5269 | // SSAT can replace a set of two conditional selectors that bound a number to an | |||
5270 | // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: | |||
5271 | // | |||
5272 | // x < -k ? -k : (x > k ? k : x) | |||
5273 | // x < -k ? -k : (x < k ? x : k) | |||
5274 | // x > -k ? (x > k ? k : x) : -k | |||
5275 | // x < k ? (x < -k ? -k : x) : k | |||
5276 | // etc. | |||
5277 | // | |||
5278 | // LLVM canonicalizes these to either a min(max()) or a max(min()) | |||
5279 | // pattern. This function tries to match one of these and will return a SSAT | |||
5280 | // node if successful. | |||
5281 | // | |||
5282 | // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 | |||
5283 | // is a power of 2. | |||
5284 | static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) { | |||
5285 | EVT VT = Op.getValueType(); | |||
5286 | SDValue V1 = Op.getOperand(0); | |||
5287 | SDValue K1 = Op.getOperand(1); | |||
5288 | SDValue TrueVal1 = Op.getOperand(2); | |||
5289 | SDValue FalseVal1 = Op.getOperand(3); | |||
5290 | ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | |||
5291 | ||||
5292 | const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1; | |||
5293 | if (Op2.getOpcode() != ISD::SELECT_CC) | |||
5294 | return SDValue(); | |||
5295 | ||||
5296 | SDValue V2 = Op2.getOperand(0); | |||
5297 | SDValue K2 = Op2.getOperand(1); | |||
5298 | SDValue TrueVal2 = Op2.getOperand(2); | |||
5299 | SDValue FalseVal2 = Op2.getOperand(3); | |||
5300 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get(); | |||
5301 | ||||
5302 | SDValue V1Tmp = V1; | |||
5303 | SDValue V2Tmp = V2; | |||
5304 | ||||
5305 | // Check that the registers and the constants match a max(min()) or min(max()) | |||
5306 | // pattern | |||
5307 | if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 || | |||
5308 | K2 != FalseVal2 || | |||
5309 | !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2)))) | |||
5310 | return SDValue(); | |||
5311 | ||||
5312 | // Check that the constant in the lower-bound check is | |||
5313 | // the opposite of the constant in the upper-bound check | |||
5314 | // in 1's complement. | |||
5315 | if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2)) | |||
5316 | return SDValue(); | |||
5317 | ||||
5318 | int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue(); | |||
5319 | int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue(); | |||
5320 | int64_t PosVal = std::max(Val1, Val2); | |||
5321 | int64_t NegVal = std::min(Val1, Val2); | |||
5322 | ||||
5323 | if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) || | |||
5324 | !isPowerOf2_64(PosVal + 1)) | |||
5325 | return SDValue(); | |||
5326 | ||||
5327 | // Handle the difference between USAT (unsigned) and SSAT (signed) | |||
5328 | // saturation | |||
5329 | // At this point, PosVal is guaranteed to be positive | |||
5330 | uint64_t K = PosVal; | |||
5331 | SDLoc dl(Op); | |||
5332 | if (Val1 == ~Val2) | |||
5333 | return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp, | |||
5334 | DAG.getConstant(llvm::countr_one(K), dl, VT)); | |||
5335 | if (NegVal == 0) | |||
5336 | return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp, | |||
5337 | DAG.getConstant(llvm::countr_one(K), dl, VT)); | |||
5338 | ||||
5339 | return SDValue(); | |||
5340 | } | |||
5341 | ||||
5342 | // Check if a condition of the type x < k ? k : x can be converted into a | |||
5343 | // bit operation instead of conditional moves. | |||
5344 | // Currently this is allowed given: | |||
5345 | // - The conditions and values match up | |||
5346 | // - k is 0 or -1 (all ones) | |||
5347 | // This function will not check the last condition, thats up to the caller | |||
5348 | // It returns true if the transformation can be made, and in such case | |||
5349 | // returns x in V, and k in SatK. | |||
5350 | static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, | |||
5351 | SDValue &SatK) | |||
5352 | { | |||
5353 | SDValue LHS = Op.getOperand(0); | |||
5354 | SDValue RHS = Op.getOperand(1); | |||
5355 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | |||
5356 | SDValue TrueVal = Op.getOperand(2); | |||
5357 | SDValue FalseVal = Op.getOperand(3); | |||
5358 | ||||
5359 | SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS) | |||
5360 | ? &RHS | |||
5361 | : nullptr; | |||
5362 | ||||
5363 | // No constant operation in comparison, early out | |||
5364 | if (!K) | |||
5365 | return false; | |||
5366 | ||||
5367 | SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal; | |||
5368 | V = (KTmp == TrueVal) ? FalseVal : TrueVal; | |||
5369 | SDValue VTmp = (K && *K == LHS) ? RHS : LHS; | |||
5370 | ||||
5371 | // If the constant on left and right side, or variable on left and right, | |||
5372 | // does not match, early out | |||
5373 | if (*K != KTmp || V != VTmp) | |||
5374 | return false; | |||
5375 | ||||
5376 | if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) { | |||
5377 | SatK = *K; | |||
5378 | return true; | |||
5379 | } | |||
5380 | ||||
5381 | return false; | |||
5382 | } | |||
5383 | ||||
5384 | bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const { | |||
5385 | if (VT == MVT::f32) | |||
5386 | return !Subtarget->hasVFP2Base(); | |||
5387 | if (VT == MVT::f64) | |||
5388 | return !Subtarget->hasFP64(); | |||
5389 | if (VT == MVT::f16) | |||
5390 | return !Subtarget->hasFullFP16(); | |||
5391 | return false; | |||
5392 | } | |||
5393 | ||||
5394 | SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { | |||
5395 | EVT VT = Op.getValueType(); | |||
5396 | SDLoc dl(Op); | |||
5397 | ||||
5398 | // Try to convert two saturating conditional selects into a single SSAT | |||
5399 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) | |||
5400 | if (SDValue SatValue = LowerSaturatingConditional(Op, DAG)) | |||
5401 | return SatValue; | |||
5402 | ||||
5403 | // Try to convert expressions of the form x < k ? k : x (and similar forms) | |||
5404 | // into more efficient bit operations, which is possible when k is 0 or -1 | |||
5405 | // On ARM and Thumb-2 which have flexible operand 2 this will result in | |||
5406 | // single instructions. On Thumb the shift and the bit operation will be two | |||
5407 | // instructions. | |||
5408 | // Only allow this transformation on full-width (32-bit) operations | |||
5409 | SDValue LowerSatConstant; | |||
5410 | SDValue SatValue; | |||
5411 | if (VT == MVT::i32 && | |||
5412 | isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) { | |||
5413 | SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue, | |||
5414 | DAG.getConstant(31, dl, VT)); | |||
5415 | if (isNullConstant(LowerSatConstant)) { | |||
5416 | SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV, | |||
5417 | DAG.getAllOnesConstant(dl, VT)); | |||
5418 | return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV); | |||
5419 | } else if (isAllOnesConstant(LowerSatConstant)) | |||
5420 | return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV); | |||
5421 | } | |||
5422 | ||||
5423 | SDValue LHS = Op.getOperand(0); | |||
5424 | SDValue RHS = Op.getOperand(1); | |||
5425 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | |||
5426 | SDValue TrueVal = Op.getOperand(2); | |||
5427 | SDValue FalseVal = Op.getOperand(3); | |||
5428 | ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal); | |||
5429 | ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal); | |||
5430 | ||||
5431 | if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && | |||
5432 | LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) { | |||
5433 | unsigned TVal = CTVal->getZExtValue(); | |||
5434 | unsigned FVal = CFVal->getZExtValue(); | |||
5435 | unsigned Opcode = 0; | |||
5436 | ||||
5437 | if (TVal == ~FVal) { | |||
5438 | Opcode = ARMISD::CSINV; | |||
5439 | } else if (TVal == ~FVal + 1) { | |||
5440 | Opcode = ARMISD::CSNEG; | |||
5441 | } else if (TVal + 1 == FVal) { | |||
5442 | Opcode = ARMISD::CSINC; | |||
5443 | } else if (TVal == FVal + 1) { | |||
5444 | Opcode = ARMISD::CSINC; | |||
5445 | std::swap(TrueVal, FalseVal); | |||
5446 | std::swap(TVal, FVal); | |||
5447 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | |||
5448 | } | |||
5449 | ||||
5450 | if (Opcode) { | |||
5451 | // If one of the constants is cheaper than another, materialise the | |||
5452 | // cheaper one and let the csel generate the other. | |||
5453 | if (Opcode != ARMISD::CSINC && | |||
5454 | HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) { | |||
5455 | std::swap(TrueVal, FalseVal); | |||
5456 | std::swap(TVal, FVal); | |||
5457 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | |||
5458 | } | |||
5459 | ||||
5460 | // Attempt to use ZR checking TVal is 0, possibly inverting the condition | |||
5461 | // to get there. CSINC not is invertable like the other two (~(~a) == a, | |||
5462 | // -(-a) == a, but (a+1)+1 != a). | |||
5463 | if (FVal == 0 && Opcode != ARMISD::CSINC) { | |||
5464 | std::swap(TrueVal, FalseVal); | |||
5465 | std::swap(TVal, FVal); | |||
5466 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | |||
5467 | } | |||
5468 | ||||
5469 | // Drops F's value because we can get it by inverting/negating TVal. | |||
5470 | FalseVal = TrueVal; | |||
5471 | ||||
5472 | SDValue ARMcc; | |||
5473 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | |||
5474 | EVT VT = TrueVal.getValueType(); | |||
5475 | return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp); | |||
5476 | } | |||
5477 | } | |||
5478 | ||||
5479 | if (isUnsupportedFloatingType(LHS.getValueType())) { | |||
5480 | DAG.getTargetLoweringInfo().softenSetCCOperands( | |||
5481 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); | |||
5482 | ||||
5483 | // If softenSetCCOperands only returned one value, we should compare it to | |||
5484 | // zero. | |||
5485 | if (!RHS.getNode()) { | |||
5486 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); | |||
5487 | CC = ISD::SETNE; | |||
5488 | } | |||
5489 | } | |||
5490 | ||||
5491 | if (LHS.getValueType() == MVT::i32) { | |||
5492 | // Try to generate VSEL on ARMv8. | |||
5493 | // The VSEL instruction can't use all the usual ARM condition | |||
5494 | // codes: it only has two bits to select the condition code, so it's | |||
5495 | // constrained to use only GE, GT, VS and EQ. | |||
5496 | // | |||
5497 | // To implement all the various ISD::SETXXX opcodes, we sometimes need to | |||
5498 | // swap the operands of the previous compare instruction (effectively | |||
5499 | // inverting the compare condition, swapping 'less' and 'greater') and | |||
5500 | // sometimes need to swap the operands to the VSEL (which inverts the | |||
5501 | // condition in the sense of firing whenever the previous condition didn't) | |||
5502 | if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 || | |||
5503 | TrueVal.getValueType() == MVT::f32 || | |||
5504 | TrueVal.getValueType() == MVT::f64)) { | |||
5505 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | |||
5506 | if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || | |||
5507 | CondCode == ARMCC::VC || CondCode == ARMCC::NE) { | |||
5508 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | |||
5509 | std::swap(TrueVal, FalseVal); | |||
5510 | } | |||
5511 | } | |||
5512 | ||||
5513 | SDValue ARMcc; | |||
5514 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5515 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | |||
5516 | // Choose GE over PL, which vsel does now support | |||
5517 | if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL) | |||
5518 | ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32); | |||
5519 | return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); | |||
5520 | } | |||
5521 | ||||
5522 | ARMCC::CondCodes CondCode, CondCode2; | |||
5523 | FPCCToARMCC(CC, CondCode, CondCode2); | |||
5524 | ||||
5525 | // Normalize the fp compare. If RHS is zero we prefer to keep it there so we | |||
5526 | // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we | |||
5527 | // must use VSEL (limited condition codes), due to not having conditional f16 | |||
5528 | // moves. | |||
5529 | if (Subtarget->hasFPARMv8Base() && | |||
5530 | !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) && | |||
5531 | (TrueVal.getValueType() == MVT::f16 || | |||
5532 | TrueVal.getValueType() == MVT::f32 || | |||
5533 | TrueVal.getValueType() == MVT::f64)) { | |||
5534 | bool swpCmpOps = false; | |||
5535 | bool swpVselOps = false; | |||
5536 | checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); | |||
5537 | ||||
5538 | if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || | |||
5539 | CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { | |||
5540 | if (swpCmpOps) | |||
5541 | std::swap(LHS, RHS); | |||
5542 | if (swpVselOps) | |||
5543 | std::swap(TrueVal, FalseVal); | |||
5544 | } | |||
5545 | } | |||
5546 | ||||
5547 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | |||
5548 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); | |||
5549 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5550 | SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); | |||
5551 | if (CondCode2 != ARMCC::AL) { | |||
5552 | SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); | |||
5553 | // FIXME: Needs another CMP because flag can have but one use. | |||
5554 | SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); | |||
5555 | Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); | |||
5556 | } | |||
5557 | return Result; | |||
5558 | } | |||
5559 | ||||
5560 | /// canChangeToInt - Given the fp compare operand, return true if it is suitable | |||
5561 | /// to morph to an integer compare sequence. | |||
5562 | static bool canChangeToInt(SDValue Op, bool &SeenZero, | |||
5563 | const ARMSubtarget *Subtarget) { | |||
5564 | SDNode *N = Op.getNode(); | |||
5565 | if (!N->hasOneUse()) | |||
5566 | // Otherwise it requires moving the value from fp to integer registers. | |||
5567 | return false; | |||
5568 | if (!N->getNumValues()) | |||
5569 | return false; | |||
5570 | EVT VT = Op.getValueType(); | |||
5571 | if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) | |||
5572 | // f32 case is generally profitable. f64 case only makes sense when vcmpe + | |||
5573 | // vmrs are very slow, e.g. cortex-a8. | |||
5574 | return false; | |||
5575 | ||||
5576 | if (isFloatingPointZero(Op)) { | |||
5577 | SeenZero = true; | |||
5578 | return true; | |||
5579 | } | |||
5580 | return ISD::isNormalLoad(N); | |||
5581 | } | |||
5582 | ||||
5583 | static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { | |||
5584 | if (isFloatingPointZero(Op)) | |||
5585 | return DAG.getConstant(0, SDLoc(Op), MVT::i32); | |||
5586 | ||||
5587 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) | |||
5588 | return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), | |||
5589 | Ld->getPointerInfo(), Ld->getAlign(), | |||
5590 | Ld->getMemOperand()->getFlags()); | |||
5591 | ||||
5592 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5592); | |||
5593 | } | |||
5594 | ||||
5595 | static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, | |||
5596 | SDValue &RetVal1, SDValue &RetVal2) { | |||
5597 | SDLoc dl(Op); | |||
5598 | ||||
5599 | if (isFloatingPointZero(Op)) { | |||
5600 | RetVal1 = DAG.getConstant(0, dl, MVT::i32); | |||
5601 | RetVal2 = DAG.getConstant(0, dl, MVT::i32); | |||
5602 | return; | |||
5603 | } | |||
5604 | ||||
5605 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { | |||
5606 | SDValue Ptr = Ld->getBasePtr(); | |||
5607 | RetVal1 = | |||
5608 | DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), | |||
5609 | Ld->getAlign(), Ld->getMemOperand()->getFlags()); | |||
5610 | ||||
5611 | EVT PtrType = Ptr.getValueType(); | |||
5612 | SDValue NewPtr = DAG.getNode(ISD::ADD, dl, | |||
5613 | PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); | |||
5614 | RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, | |||
5615 | Ld->getPointerInfo().getWithOffset(4), | |||
5616 | commonAlignment(Ld->getAlign(), 4), | |||
5617 | Ld->getMemOperand()->getFlags()); | |||
5618 | return; | |||
5619 | } | |||
5620 | ||||
5621 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5621); | |||
5622 | } | |||
5623 | ||||
5624 | /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some | |||
5625 | /// f32 and even f64 comparisons to integer ones. | |||
5626 | SDValue | |||
5627 | ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { | |||
5628 | SDValue Chain = Op.getOperand(0); | |||
5629 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); | |||
5630 | SDValue LHS = Op.getOperand(2); | |||
5631 | SDValue RHS = Op.getOperand(3); | |||
5632 | SDValue Dest = Op.getOperand(4); | |||
5633 | SDLoc dl(Op); | |||
5634 | ||||
5635 | bool LHSSeenZero = false; | |||
5636 | bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); | |||
5637 | bool RHSSeenZero = false; | |||
5638 | bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); | |||
5639 | if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { | |||
5640 | // If unsafe fp math optimization is enabled and there are no other uses of | |||
5641 | // the CMP operands, and the condition code is EQ or NE, we can optimize it | |||
5642 | // to an integer comparison. | |||
5643 | if (CC == ISD::SETOEQ) | |||
5644 | CC = ISD::SETEQ; | |||
5645 | else if (CC == ISD::SETUNE) | |||
5646 | CC = ISD::SETNE; | |||
5647 | ||||
5648 | SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); | |||
5649 | SDValue ARMcc; | |||
5650 | if (LHS.getValueType() == MVT::f32) { | |||
5651 | LHS = DAG.getNode(ISD::AND, dl, MVT::i32, | |||
5652 | bitcastf32Toi32(LHS, DAG), Mask); | |||
5653 | RHS = DAG.getNode(ISD::AND, dl, MVT::i32, | |||
5654 | bitcastf32Toi32(RHS, DAG), Mask); | |||
5655 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | |||
5656 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5657 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, | |||
5658 | Chain, Dest, ARMcc, CCR, Cmp); | |||
5659 | } | |||
5660 | ||||
5661 | SDValue LHS1, LHS2; | |||
5662 | SDValue RHS1, RHS2; | |||
5663 | expandf64Toi32(LHS, DAG, LHS1, LHS2); | |||
5664 | expandf64Toi32(RHS, DAG, RHS1, RHS2); | |||
5665 | LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); | |||
5666 | RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); | |||
5667 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | |||
5668 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | |||
5669 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); | |||
5670 | SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; | |||
5671 | return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); | |||
5672 | } | |||
5673 | ||||
5674 | return SDValue(); | |||
5675 | } | |||
5676 | ||||
5677 | SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { | |||
5678 | SDValue Chain = Op.getOperand(0); | |||
5679 | SDValue Cond = Op.getOperand(1); | |||
5680 | SDValue Dest = Op.getOperand(2); | |||
5681 | SDLoc dl(Op); | |||
5682 | ||||
5683 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch | |||
5684 | // instruction. | |||
5685 | unsigned Opc = Cond.getOpcode(); | |||
5686 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && | |||
5687 | !Subtarget->isThumb1Only(); | |||
5688 | if (Cond.getResNo() == 1 && | |||
5689 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
5690 | Opc == ISD::USUBO || OptimizeMul)) { | |||
5691 | // Only lower legal XALUO ops. | |||
5692 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) | |||
5693 | return SDValue(); | |||
5694 | ||||
5695 | // The actual operation with overflow check. | |||
5696 | SDValue Value, OverflowCmp; | |||
5697 | SDValue ARMcc; | |||
5698 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); | |||
5699 | ||||
5700 | // Reverse the condition code. | |||
5701 | ARMCC::CondCodes CondCode = | |||
5702 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); | |||
5703 | CondCode = ARMCC::getOppositeCondition(CondCode); | |||
5704 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); | |||
5705 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5706 | ||||
5707 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, | |||
5708 | OverflowCmp); | |||
5709 | } | |||
5710 | ||||
5711 | return SDValue(); | |||
5712 | } | |||
5713 | ||||
5714 | SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { | |||
5715 | SDValue Chain = Op.getOperand(0); | |||
5716 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); | |||
5717 | SDValue LHS = Op.getOperand(2); | |||
5718 | SDValue RHS = Op.getOperand(3); | |||
5719 | SDValue Dest = Op.getOperand(4); | |||
5720 | SDLoc dl(Op); | |||
5721 | ||||
5722 | if (isUnsupportedFloatingType(LHS.getValueType())) { | |||
5723 | DAG.getTargetLoweringInfo().softenSetCCOperands( | |||
5724 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); | |||
5725 | ||||
5726 | // If softenSetCCOperands only returned one value, we should compare it to | |||
5727 | // zero. | |||
5728 | if (!RHS.getNode()) { | |||
5729 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); | |||
5730 | CC = ISD::SETNE; | |||
5731 | } | |||
5732 | } | |||
5733 | ||||
5734 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch | |||
5735 | // instruction. | |||
5736 | unsigned Opc = LHS.getOpcode(); | |||
5737 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && | |||
5738 | !Subtarget->isThumb1Only(); | |||
5739 | if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && | |||
5740 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
5741 | Opc == ISD::USUBO || OptimizeMul) && | |||
5742 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { | |||
5743 | // Only lower legal XALUO ops. | |||
5744 | if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) | |||
5745 | return SDValue(); | |||
5746 | ||||
5747 | // The actual operation with overflow check. | |||
5748 | SDValue Value, OverflowCmp; | |||
5749 | SDValue ARMcc; | |||
5750 | std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); | |||
5751 | ||||
5752 | if ((CC == ISD::SETNE) != isOneConstant(RHS)) { | |||
5753 | // Reverse the condition code. | |||
5754 | ARMCC::CondCodes CondCode = | |||
5755 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); | |||
5756 | CondCode = ARMCC::getOppositeCondition(CondCode); | |||
5757 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); | |||
5758 | } | |||
5759 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5760 | ||||
5761 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, | |||
5762 | OverflowCmp); | |||
5763 | } | |||
5764 | ||||
5765 | if (LHS.getValueType() == MVT::i32) { | |||
5766 | SDValue ARMcc; | |||
5767 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | |||
5768 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5769 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, | |||
5770 | Chain, Dest, ARMcc, CCR, Cmp); | |||
5771 | } | |||
5772 | ||||
5773 | if (getTargetMachine().Options.UnsafeFPMath && | |||
5774 | (CC == ISD::SETEQ || CC == ISD::SETOEQ || | |||
5775 | CC == ISD::SETNE || CC == ISD::SETUNE)) { | |||
5776 | if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) | |||
5777 | return Result; | |||
5778 | } | |||
5779 | ||||
5780 | ARMCC::CondCodes CondCode, CondCode2; | |||
5781 | FPCCToARMCC(CC, CondCode, CondCode2); | |||
5782 | ||||
5783 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | |||
5784 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); | |||
5785 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | |||
5786 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); | |||
5787 | SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; | |||
5788 | SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); | |||
5789 | if (CondCode2 != ARMCC::AL) { | |||
5790 | ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); | |||
5791 | SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; | |||
5792 | Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); | |||
5793 | } | |||
5794 | return Res; | |||
5795 | } | |||
5796 | ||||
5797 | SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { | |||
5798 | SDValue Chain = Op.getOperand(0); | |||
5799 | SDValue Table = Op.getOperand(1); | |||
5800 | SDValue Index = Op.getOperand(2); | |||
5801 | SDLoc dl(Op); | |||
5802 | ||||
5803 | EVT PTy = getPointerTy(DAG.getDataLayout()); | |||
5804 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); | |||
5805 | SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); | |||
5806 | Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); | |||
5807 | Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); | |||
5808 | SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index); | |||
5809 | if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { | |||
5810 | // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table | |||
5811 | // which does another jump to the destination. This also makes it easier | |||
5812 | // to translate it to TBB / TBH later (Thumb2 only). | |||
5813 | // FIXME: This might not work if the function is extremely large. | |||
5814 | return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, | |||
5815 | Addr, Op.getOperand(2), JTI); | |||
5816 | } | |||
5817 | if (isPositionIndependent() || Subtarget->isROPI()) { | |||
5818 | Addr = | |||
5819 | DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, | |||
5820 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); | |||
5821 | Chain = Addr.getValue(1); | |||
5822 | Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr); | |||
5823 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); | |||
5824 | } else { | |||
5825 | Addr = | |||
5826 | DAG.getLoad(PTy, dl, Chain, Addr, | |||
5827 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); | |||
5828 | Chain = Addr.getValue(1); | |||
5829 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); | |||
5830 | } | |||
5831 | } | |||
5832 | ||||
5833 | static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { | |||
5834 | EVT VT = Op.getValueType(); | |||
5835 | SDLoc dl(Op); | |||
5836 | ||||
5837 | if (Op.getValueType().getVectorElementType() == MVT::i32) { | |||
5838 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) | |||
5839 | return Op; | |||
5840 | return DAG.UnrollVectorOp(Op.getNode()); | |||
5841 | } | |||
5842 | ||||
5843 | const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16(); | |||
5844 | ||||
5845 | EVT NewTy; | |||
5846 | const EVT OpTy = Op.getOperand(0).getValueType(); | |||
5847 | if (OpTy == MVT::v4f32) | |||
5848 | NewTy = MVT::v4i32; | |||
5849 | else if (OpTy == MVT::v4f16 && HasFullFP16) | |||
5850 | NewTy = MVT::v4i16; | |||
5851 | else if (OpTy == MVT::v8f16 && HasFullFP16) | |||
5852 | NewTy = MVT::v8i16; | |||
5853 | else | |||
5854 | llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5854); | |||
5855 | ||||
5856 | if (VT != MVT::v4i16 && VT != MVT::v8i16) | |||
5857 | return DAG.UnrollVectorOp(Op.getNode()); | |||
5858 | ||||
5859 | Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0)); | |||
5860 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); | |||
5861 | } | |||
5862 | ||||
5863 | SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { | |||
5864 | EVT VT = Op.getValueType(); | |||
5865 | if (VT.isVector()) | |||
5866 | return LowerVectorFP_TO_INT(Op, DAG); | |||
5867 | ||||
5868 | bool IsStrict = Op->isStrictFPOpcode(); | |||
5869 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); | |||
5870 | ||||
5871 | if (isUnsupportedFloatingType(SrcVal.getValueType())) { | |||
5872 | RTLIB::Libcall LC; | |||
5873 | if (Op.getOpcode() == ISD::FP_TO_SINT || | |||
5874 | Op.getOpcode() == ISD::STRICT_FP_TO_SINT) | |||
5875 | LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), | |||
5876 | Op.getValueType()); | |||
5877 | else | |||
5878 | LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), | |||
5879 | Op.getValueType()); | |||
5880 | SDLoc Loc(Op); | |||
5881 | MakeLibCallOptions CallOptions; | |||
5882 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); | |||
5883 | SDValue Result; | |||
5884 | std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal, | |||
5885 | CallOptions, Loc, Chain); | |||
5886 | return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result; | |||
5887 | } | |||
5888 | ||||
5889 | // FIXME: Remove this when we have strict fp instruction selection patterns | |||
5890 | if (IsStrict) { | |||
5891 | SDLoc Loc(Op); | |||
5892 | SDValue Result = | |||
5893 | DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT | |||
5894 | : ISD::FP_TO_UINT, | |||
5895 | Loc, Op.getValueType(), SrcVal); | |||
5896 | return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc); | |||
5897 | } | |||
5898 | ||||
5899 | return Op; | |||
5900 | } | |||
5901 | ||||
5902 | static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, | |||
5903 | const ARMSubtarget *Subtarget) { | |||
5904 | EVT VT = Op.getValueType(); | |||
5905 | EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); | |||
5906 | EVT FromVT = Op.getOperand(0).getValueType(); | |||
5907 | ||||
5908 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32) | |||
5909 | return Op; | |||
5910 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 && | |||
5911 | Subtarget->hasFP64()) | |||
5912 | return Op; | |||
5913 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 && | |||
5914 | Subtarget->hasFullFP16()) | |||
5915 | return Op; | |||
5916 | if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 && | |||
5917 | Subtarget->hasMVEFloatOps()) | |||
5918 | return Op; | |||
5919 | if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 && | |||
5920 | Subtarget->hasMVEFloatOps()) | |||
5921 | return Op; | |||
5922 | ||||
5923 | if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16) | |||
5924 | return SDValue(); | |||
5925 | ||||
5926 | SDLoc DL(Op); | |||
5927 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; | |||
5928 | unsigned BW = ToVT.getScalarSizeInBits() - IsSigned; | |||
5929 | SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), | |||
5930 | DAG.getValueType(VT.getScalarType())); | |||
5931 | SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT, | |||
5932 | DAG.getConstant((1 << BW) - 1, DL, VT)); | |||
5933 | if (IsSigned) | |||
5934 | Max = DAG.getNode(ISD::SMAX, DL, VT, Max, | |||
5935 | DAG.getConstant(-(1 << BW), DL, VT)); | |||
5936 | return Max; | |||
5937 | } | |||
5938 | ||||
5939 | static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { | |||
5940 | EVT VT = Op.getValueType(); | |||
5941 | SDLoc dl(Op); | |||
5942 | ||||
5943 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { | |||
5944 | if (VT.getVectorElementType() == MVT::f32) | |||
5945 | return Op; | |||
5946 | return DAG.UnrollVectorOp(Op.getNode()); | |||
5947 | } | |||
5948 | ||||
5949 | assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5951, __extension__ __PRETTY_FUNCTION__)) | |||
5950 | Op.getOperand(0).getValueType() == MVT::v8i16) &&(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5951, __extension__ __PRETTY_FUNCTION__)) | |||
5951 | "Invalid type for custom lowering!")(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5951, __extension__ __PRETTY_FUNCTION__)); | |||
5952 | ||||
5953 | const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16(); | |||
5954 | ||||
5955 | EVT DestVecType; | |||
5956 | if (VT == MVT::v4f32) | |||
5957 | DestVecType = MVT::v4i32; | |||
5958 | else if (VT == MVT::v4f16 && HasFullFP16) | |||
5959 | DestVecType = MVT::v4i16; | |||
5960 | else if (VT == MVT::v8f16 && HasFullFP16) | |||
5961 | DestVecType = MVT::v8i16; | |||
5962 | else | |||
5963 | return DAG.UnrollVectorOp(Op.getNode()); | |||
5964 | ||||
5965 | unsigned CastOpc; | |||
5966 | unsigned Opc; | |||
5967 | switch (Op.getOpcode()) { | |||
5968 | default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 5968); | |||
5969 | case ISD::SINT_TO_FP: | |||
5970 | CastOpc = ISD::SIGN_EXTEND; | |||
5971 | Opc = ISD::SINT_TO_FP; | |||
5972 | break; | |||
5973 | case ISD::UINT_TO_FP: | |||
5974 | CastOpc = ISD::ZERO_EXTEND; | |||
5975 | Opc = ISD::UINT_TO_FP; | |||
5976 | break; | |||
5977 | } | |||
5978 | ||||
5979 | Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0)); | |||
5980 | return DAG.getNode(Opc, dl, VT, Op); | |||
5981 | } | |||
5982 | ||||
5983 | SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { | |||
5984 | EVT VT = Op.getValueType(); | |||
5985 | if (VT.isVector()) | |||
5986 | return LowerVectorINT_TO_FP(Op, DAG); | |||
5987 | if (isUnsupportedFloatingType(VT)) { | |||
5988 | RTLIB::Libcall LC; | |||
5989 | if (Op.getOpcode() == ISD::SINT_TO_FP) | |||
5990 | LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), | |||
5991 | Op.getValueType()); | |||
5992 | else | |||
5993 | LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), | |||
5994 | Op.getValueType()); | |||
5995 | MakeLibCallOptions CallOptions; | |||
5996 | return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), | |||
5997 | CallOptions, SDLoc(Op)).first; | |||
5998 | } | |||
5999 | ||||
6000 | return Op; | |||
6001 | } | |||
6002 | ||||
6003 | SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { | |||
6004 | // Implement fcopysign with a fabs and a conditional fneg. | |||
6005 | SDValue Tmp0 = Op.getOperand(0); | |||
6006 | SDValue Tmp1 = Op.getOperand(1); | |||
6007 | SDLoc dl(Op); | |||
6008 | EVT VT = Op.getValueType(); | |||
6009 | EVT SrcVT = Tmp1.getValueType(); | |||
6010 | bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || | |||
6011 | Tmp0.getOpcode() == ARMISD::VMOVDRR; | |||
6012 | bool UseNEON = !InGPR && Subtarget->hasNEON(); | |||
6013 | ||||
6014 | if (UseNEON) { | |||
6015 | // Use VBSL to copy the sign bit. | |||
6016 | unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80); | |||
6017 | SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, | |||
6018 | DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); | |||
6019 | EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; | |||
6020 | if (VT == MVT::f64) | |||
6021 | Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT, | |||
6022 | DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), | |||
6023 | DAG.getConstant(32, dl, MVT::i32)); | |||
6024 | else /*if (VT == MVT::f32)*/ | |||
6025 | Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); | |||
6026 | if (SrcVT == MVT::f32) { | |||
6027 | Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); | |||
6028 | if (VT == MVT::f64) | |||
6029 | Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT, | |||
6030 | DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), | |||
6031 | DAG.getConstant(32, dl, MVT::i32)); | |||
6032 | } else if (VT == MVT::f32) | |||
6033 | Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64, | |||
6034 | DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), | |||
6035 | DAG.getConstant(32, dl, MVT::i32)); | |||
6036 | Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); | |||